diff --git a/.clang-tidy b/.clang-tidy index de19059d09e..bb63bf2eea6 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -27,6 +27,8 @@ Checks: [ '-bugprone-not-null-terminated-result', '-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged '-bugprone-unchecked-optional-access', + '-bugprone-crtp-constructor-accessibility', + '-bugprone-suspicious-stringview-data-usage', '-cert-dcl16-c', '-cert-dcl37-c', @@ -36,6 +38,7 @@ Checks: [ '-cert-msc51-cpp', '-cert-oop54-cpp', '-cert-oop57-cpp', + '-cert-err33-c', # Misreports on clang-19: it warns about all functions containing 'remove' in the name, not only about the standard library. '-clang-analyzer-optin.performance.Padding', @@ -99,6 +102,7 @@ Checks: [ '-modernize-use-emplace', '-modernize-use-nodiscard', '-modernize-use-trailing-return-type', + '-modernize-use-designated-initializers', '-performance-enum-size', '-performance-inefficient-string-concatenation', diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 06e893fabb3..5088bf90ede 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -13,3 +13,6 @@ # dbms/ → src/ # (though it is unlikely that you will see it in blame) 06446b4f08a142d6f1bc30664c47ded88ab51782 + +# Applied Black formatter for Python code +e6f5a3f98b21ba99cf274a9833797889e020a2b3 diff --git a/.github/actions/release/action.yml b/.github/actions/release/action.yml new file mode 100644 index 00000000000..c3897682a33 --- /dev/null +++ b/.github/actions/release/action.yml @@ -0,0 +1,168 @@ +name: Release + +description: Makes patch releases and creates new release branch + +inputs: + ref: + description: 'Git reference (branch or commit sha) from which to create the release' + required: true + type: string + type: + description: 'The type of release: "new" for a new release or "patch" for a patch release' + required: true + type: choice + options: + - patch + - new + dry-run: + description: 'Dry run' + required: false + default: true + type: boolean + token: + required: true + type: string + +runs: + using: "composite" + steps: + - name: Prepare Release Info + shell: bash + run: | + python3 ./tests/ci/create_release.py --prepare-release-info \ + --ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \ + ${{ inputs.dry-run && '--dry-run' || '' }} + echo "::group::Release Info" + python3 -m json.tool /tmp/release_info.json + echo "::endgroup::" + release_tag=$(jq -r '.release_tag' /tmp/release_info.json) + commit_sha=$(jq -r '.commit_sha' /tmp/release_info.json) + echo "Release Tag: $release_tag" + echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV" + echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV" + - name: Download All Release Artifacts + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/create_release.py --download-packages ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Push Git Tag for the Release + shell: bash + run: | + python3 ./tests/ci/create_release.py --push-release-tag ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Push New Release Branch + if: ${{ inputs.type == 'new' }} + shell: bash + run: | + python3 ./tests/ci/create_release.py --push-new-release-branch ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Bump CH Version and Update Contributors' List + shell: bash + run: | + python3 ./tests/ci/create_release.py --create-bump-version-pr ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Bump Docker versions, Changelog, Security + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + git checkout master + python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security" + echo "List versions" + ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv + echo "Update docker version" + ./utils/list-versions/update-docker-version.sh + echo "Generate ChangeLog" + export CI=1 + docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ + --volume=".:/ClickHouse" clickhouse/style-test \ + /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ + --gh-user-or-token=${{ inputs.token }} --jobs=5 \ + --output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} + git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md + echo "Generate Security" + python3 ./utils/security-generator/generate_security.py > SECURITY.md + git diff HEAD + - name: Create ChangeLog PR + if: ${{ inputs.type == 'patch' && ! inputs.dry-run }} + uses: peter-evans/create-pull-request@v6 + with: + author: "robot-clickhouse " + token: ${{ inputs.token }} + committer: "robot-clickhouse " + commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} + branch: auto/${{ env.RELEASE_TAG }} + assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher + delete-branch: true + title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }} + labels: do not test + body: | + Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} + ### Changelog category (leave one): + - Not for changelog (changelog entry is not required) + - name: Complete previous steps and Restore git state + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/create_release.py --set-progress-completed + git reset --hard HEAD + git checkout "$GITHUB_REF_NAME" + - name: Create GH Release + shell: bash + if: ${{ inputs.type == 'patch' }} + run: | + python3 ./tests/ci/create_release.py --create-gh-release ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Export TGZ Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --export-tgz ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test TGZ Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --test-tgz ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Export RPM Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --export-rpm ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test RPM Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --test-rpm ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Export Debian Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --export-debian ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test Debian Packages + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + python3 ./tests/ci/artifactory.py --test-debian ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Docker clickhouse/clickhouse-server building + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + cd "./tests/ci" + python3 ./create_release.py --set-progress-started --progress "docker server release" + export CHECK_NAME="Docker server image" + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + python3 ./create_release.py --set-progress-completed + - name: Docker clickhouse/clickhouse-keeper building + if: ${{ inputs.type == 'patch' }} + shell: bash + run: | + cd "./tests/ci" + python3 ./create_release.py --set-progress-started --progress "docker keeper release" + export CHECK_NAME="Docker keeper image" + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + python3 ./create_release.py --set-progress-completed + - name: Set current Release progress to Completed with OK + shell: bash + run: | + python3 ./tests/ci/create_release.py --set-progress-started --progress "completed" + python3 ./tests/ci/create_release.py --set-progress-completed + - name: Post Slack Message + if: ${{ !cancelled() }} + shell: bash + run: | + python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }} diff --git a/.github/workflows/auto_release.yml b/.github/workflows/auto_release.yml index f1a6b307b40..457ffacc7a8 100644 --- a/.github/workflows/auto_release.yml +++ b/.github/workflows/auto_release.yml @@ -1,44 +1,110 @@ name: AutoRelease env: - # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 + DRY_RUN: true concurrency: - group: auto-release + group: release on: # yamllint disable-line rule:truthy - # schedule: - # - cron: '0 10-16 * * 1-5' + # Workflow uses a test bucket for packages and dry run mode (no real releases) + schedule: + - cron: '0 9 * * *' + - cron: '0 15 * * *' workflow_dispatch: + inputs: + dry-run: + description: 'Dry run' + required: false + default: true + type: boolean jobs: - CherryPick: - runs-on: [self-hosted, style-checker-aarch64] + AutoRelease: + runs-on: [self-hosted, release-maker] steps: + - name: DebugInfo + uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 - name: Set envs - # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings run: | cat >> "$GITHUB_ENV" << 'EOF' - TEMP_PATH=${{runner.temp}}/cherry_pick ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" + - name: Set DRY_RUN for dispatch + if: ${{ github.event_name == 'workflow_dispatch' }} + run: echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_ENV" - name: Check out repository code uses: ClickHouse/checkout@v1 with: - clear-repository: true token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} fetch-depth: 0 - - name: Auto-release + - name: Auto Release Prepare run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 auto_release.py --release-after-days=3 - - name: Cleanup - if: always() + python3 auto_release.py --prepare + echo "::group::Auto Release Info" + python3 -m json.tool /tmp/autorelease_info.json + echo "::endgroup::" + { + echo 'AUTO_RELEASE_PARAMS<> "$GITHUB_ENV" + - name: Post Release Branch statuses + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 auto_release.py --post-status + - name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].release_branch }} + if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[0].ready }} + uses: ./.github/actions/release + with: + ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0].commit_sha }} + type: patch + dry-run: ${{ env.DRY_RUN }} + token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} + - name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].release_branch }} + if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[0] && fromJson(env.AUTO_RELEASE_PARAMS).releases[1].ready }} + uses: ./.github/actions/release + with: + ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[1].commit_sha }} + type: patch + dry-run: ${{ env.DRY_RUN }} + token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} + - name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].release_branch }} + if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2] && fromJson(env.AUTO_RELEASE_PARAMS).releases[2].ready }} + uses: ./.github/actions/release + with: + ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[2].commit_sha }} + type: patch + dry-run: ${{ env.DRY_RUN }} + token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} + - name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].release_branch }} + if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3] && fromJson(env.AUTO_RELEASE_PARAMS).releases[3].ready }} + uses: ./.github/actions/release + with: + ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[3].commit_sha }} + type: patch + dry-run: ${{ env.DRY_RUN }} + token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} + - name: Release ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].release_branch }} + if: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4] && fromJson(env.AUTO_RELEASE_PARAMS).releases[4].ready }} + uses: ./.github/actions/release + with: + ref: ${{ fromJson(env.AUTO_RELEASE_PARAMS).releases[4].commit_sha }} + type: patch + dry-run: ${{ env.DRY_RUN }} + token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} + - name: Post Slack Message + if: ${{ !cancelled() }} + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }} + - name: Clean up run: | docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 64c3d2f8342..c602a46d23c 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -36,10 +36,6 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" echo "Testing the main ci directory" python3 -m unittest discover -s . -p 'test_*.py' - for dir in *_lambda/; do - echo "Testing $dir" - python3 -m unittest discover -s "$dir" -p 'test_*.py' - done - name: PrepareRunConfig id: runconfig run: | @@ -62,7 +58,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckX86: @@ -245,8 +241,9 @@ jobs: runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: + - RunConfig - Builds_Report - FunctionalStatelessTestAsan - FunctionalStatefulTestDebug @@ -261,6 +258,7 @@ jobs: with: clear-repository: true - name: Finish label + if: ${{ !failure() }} run: | cd "$GITHUB_WORKSPACE/tests/ci" # update mergeable check @@ -268,3 +266,10 @@ jobs: # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 972aff90195..55644bdd503 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -2,7 +2,6 @@ name: CreateRelease concurrency: group: release - 'on': workflow_dispatch: inputs: @@ -31,136 +30,15 @@ jobs: steps: - name: DebugInfo uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 - - name: Set envs - # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings - run: | - cat >> "$GITHUB_ENV" << 'EOF' - ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" - echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV" - - name: Download All Release Artifacts - if: ${{ inputs.type == 'patch' }} - run: | - python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }} - - name: Push Git Tag for the Release - run: | - python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} - - name: Push New Release Branch - if: ${{ inputs.type == 'new' }} - run: | - python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} - - name: Bump CH Version and Update Contributors' List - run: | - python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} - - name: Checkout master - run: | - git checkout master - - name: Bump Docker versions, Changelog, Security - if: ${{ inputs.type == 'patch' }} - run: | - [ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1 - echo "List versions" - ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv - echo "Update docker version" - ./utils/list-versions/update-docker-version.sh - echo "Generate ChangeLog" - export CI=1 - docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ - --volume=".:/ClickHouse" clickhouse/style-test \ - /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ - --gh-user-or-token="$GH_TOKEN" --jobs=5 \ - --output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} - git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md - echo "Generate Security" - python3 ./utils/security-generator/generate_security.py > SECURITY.md - git diff HEAD - - name: Generate ChangeLog - if: ${{ inputs.type == 'patch' && ! inputs.dry-run }} - uses: peter-evans/create-pull-request@v6 + - name: Call Release Action + uses: ./.github/actions/release with: - author: "robot-clickhouse " - token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} - committer: "robot-clickhouse " - commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} - branch: auto/${{ env.RELEASE_TAG }} - assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher - delete-branch: true - title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }} - labels: do not test - body: | - Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} - ### Changelog category (leave one): - - Not for changelog (changelog entry is not required) - - name: Reset changes if Dry-run - if: ${{ inputs.dry-run }} - run: | - git reset --hard HEAD - - name: Checkout back to GITHUB_REF - run: | - git checkout "$GITHUB_REF_NAME" - - name: Create GH Release - if: ${{ inputs.type == 'patch' }} - run: | - python3 ./tests/ci/create_release.py --create-gh-release \ - --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - - - name: Export TGZ Packages - if: ${{ inputs.type == 'patch' }} - run: | - python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - - name: Test TGZ Packages - if: ${{ inputs.type == 'patch' }} - run: | - python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - - name: Export RPM Packages - if: ${{ inputs.type == 'patch' }} - run: | - python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - - name: Test RPM Packages - if: ${{ inputs.type == 'patch' }} - run: | - python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - - name: Export Debian Packages - if: ${{ inputs.type == 'patch' }} - run: | - python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - - name: Test Debian Packages - if: ${{ inputs.type == 'patch' }} - run: | - python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - - name: Docker clickhouse/clickhouse-server building - if: ${{ inputs.type == 'patch' }} - run: | - cd "./tests/ci" - export CHECK_NAME="Docker server image" - python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - - name: Docker clickhouse/clickhouse-keeper building - if: ${{ inputs.type == 'patch' }} - run: | - cd "./tests/ci" - export CHECK_NAME="Docker keeper image" - python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - - name: Post Slack Message - if: always() - run: | - echo Slack Message + ref: ${{ inputs.ref }} + type: ${{ inputs.type }} + dry-run: ${{ inputs.dry-run }} + token: ${{secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN}} diff --git a/.github/workflows/reusable_docker.yml b/.github/workflows/docker_test_images.yml similarity index 100% rename from .github/workflows/reusable_docker.yml rename to .github/workflows/docker_test_images.yml diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 2a7e6f737ab..7c319da6045 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -33,10 +33,6 @@ jobs: # cd "$GITHUB_WORKSPACE/tests/ci" # echo "Testing the main ci directory" # python3 -m unittest discover -s . -p 'test_*.py' -# for dir in *_lambda/; do -# echo "Testing $dir" -# python3 -m unittest discover -s "$dir" -p 'test_*.py' -# done - name: PrepareRunConfig id: runconfig run: | @@ -58,7 +54,7 @@ jobs: # BuildDockers: # needs: [RunConfig] # if: ${{ !failure() && !cancelled() }} -# uses: ./.github/workflows/reusable_docker.yml +# uses: ./.github/workflows/docker_test_images.yml # with: # data: ${{ needs.RunConfig.outputs.data }} # StyleCheck: @@ -125,34 +121,6 @@ jobs: runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - MarkReleaseReady: - if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, Builds_1, Builds_2] - runs-on: [self-hosted, style-checker-aarch64] - steps: - - name: Debug - run: | - echo need with different filters - cat << 'EOF' - ${{ toJSON(needs) }} - ${{ toJSON(needs.*.result) }} - no failures ${{ !contains(needs.*.result, 'failure') }} - no skips ${{ !contains(needs.*.result, 'skipped') }} - no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - EOF - - name: Not ready - # fail the job to be able to restart it - if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} - run: exit 1 - - name: Check out repository code - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - uses: ClickHouse/checkout@v1 - - name: Mark Commit Release Ready - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 mark_release_ready.py - FinishCheck: if: ${{ !cancelled() }} needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] @@ -164,3 +132,10 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 01685ee1f5a..4b186241a0e 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -30,10 +30,6 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" echo "Testing the main ci directory" python3 -m unittest discover -s . -p 'test_*.py' - for dir in *_lambda/; do - echo "Testing $dir" - python3 -m unittest discover -s "$dir" -p 'test_*.py' - done - name: PrepareRunConfig id: runconfig run: | @@ -51,7 +47,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} StyleCheck: @@ -97,7 +93,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + if: ${{ !cancelled() }} # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1] runs-on: [self-hosted, style-checker-aarch64] @@ -105,6 +101,14 @@ jobs: - name: Check out repository code uses: ClickHouse/checkout@v1 - name: Check and set merge status + if: ${{ needs.StyleCheck.result == 'success' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 3e1c5576e7d..84db3338065 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -40,7 +40,21 @@ jobs: } >> "$GITHUB_OUTPUT" BuildDockers: needs: [RunConfig] - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: "${{ needs.RunConfig.outputs.data }}" set_latest: true + CheckWorkflow: + if: ${{ !cancelled() }} + needs: [RunConfig, BuildDockers] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c9f4f858825..c7d7b28af38 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -48,10 +48,6 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" echo "Testing the main ci directory" python3 -m unittest discover -s . -p 'test_*.py' - for dir in *_lambda/; do - echo "Testing $dir" - python3 -m unittest discover -s "$dir" -p 'test_*.py' - done - name: PrepareRunConfig id: runconfig run: | @@ -72,7 +68,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() && toJson(fromJson(needs.RunConfig.outputs.data).docker_data.missing_multi) != '[]' }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} StyleCheck: @@ -155,7 +151,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + if: ${{ !cancelled() }} # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1] runs-on: [self-hosted, style-checker-aarch64] @@ -165,9 +161,17 @@ jobs: with: filter: tree:0 - name: Check and set merge status + if: ${{ needs.StyleCheck.result == 'success' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + python3 ./tests/ci/ci_buddy.py --check-wf-status ################################# Stage Final ################################# # diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 6bf846d7535..bca9ff33cd0 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -33,10 +33,6 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" echo "Testing the main ci directory" python3 -m unittest discover -s . -p 'test_*.py' - for dir in *_lambda/; do - echo "Testing $dir" - python3 -m unittest discover -s "$dir" -p 'test_*.py' - done - name: PrepareRunConfig id: runconfig run: | @@ -57,7 +53,7 @@ jobs: BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_docker.yml + uses: ./.github/workflows/docker_test_images.yml with: data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckX86: @@ -445,8 +441,9 @@ jobs: runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: + - RunConfig - DockerServerImage - DockerKeeperImage - Builds_Report @@ -482,9 +479,18 @@ jobs: with: clear-repository: true - name: Finish label + if: ${{ !failure() }} run: | cd "$GITHUB_WORKSPACE/tests/ci" # update mergeable check python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml index c01dd8ca9d4..ca6df5df14e 100644 --- a/.github/workflows/reusable_test.yml +++ b/.github/workflows/reusable_test.yml @@ -102,6 +102,8 @@ jobs: --job-name '${{inputs.test_name}}' \ --run \ --run-command '''${{inputs.run_command}}''' + # shellcheck disable=SC2319 + echo "JOB_EXIT_CODE=$?" >> "$GITHUB_ENV" - name: Post run if: ${{ !cancelled() }} run: | diff --git a/.yamllint b/.yamllint index f144e2d47b1..7fb741ec9f4 100644 --- a/.yamllint +++ b/.yamllint @@ -14,3 +14,9 @@ rules: comments: min-spaces-from-content: 1 document-start: disable + colons: disable + indentation: disable + line-length: disable + trailing-spaces: disable + truthy: disable + new-line-at-end-of-file: disable diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index f20b9daf22e..87f62bf377d 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -3,8 +3,9 @@ #include #include -#include +#include +namespace fs = std::filesystem; bool cgroupsV2Enabled() { @@ -13,11 +14,11 @@ bool cgroupsV2Enabled() { /// This file exists iff the host has cgroups v2 enabled. auto controllers_file = default_cgroups_mount / "cgroup.controllers"; - if (!std::filesystem::exists(controllers_file)) + if (!fs::exists(controllers_file)) return false; return true; } - catch (const std::filesystem::filesystem_error &) /// all "underlying OS API errors", typically: permission denied + catch (const fs::filesystem_error &) /// all "underlying OS API errors", typically: permission denied { return false; /// not logging the exception as most callers fall back to cgroups v1 } @@ -33,8 +34,9 @@ bool cgroupsV2MemoryControllerEnabled() /// According to https://docs.kernel.org/admin-guide/cgroup-v2.html, file "cgroup.controllers" defines which controllers are available /// for the current + child cgroups. The set of available controllers can be restricted from level to level using file /// "cgroups.subtree_control". It is therefore sufficient to check the bottom-most nested "cgroup.controllers" file. - std::string cgroup = cgroupV2OfProcess(); - auto cgroup_dir = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + fs::path cgroup_dir = cgroupV2PathOfProcess(); + if (cgroup_dir.empty()) + return false; std::ifstream controllers_file(cgroup_dir / "cgroup.controllers"); if (!controllers_file.is_open()) return false; @@ -46,7 +48,7 @@ bool cgroupsV2MemoryControllerEnabled() #endif } -std::string cgroupV2OfProcess() +fs::path cgroupV2PathOfProcess() { #if defined(OS_LINUX) chassert(cgroupsV2Enabled()); @@ -54,17 +56,18 @@ std::string cgroupV2OfProcess() /// A simpler way to get the membership is: std::ifstream cgroup_name_file("/proc/self/cgroup"); if (!cgroup_name_file.is_open()) - return ""; + return {}; /// With cgroups v2, there will be a *single* line with prefix "0::/" /// (see https://docs.kernel.org/admin-guide/cgroup-v2.html) std::string cgroup; std::getline(cgroup_name_file, cgroup); static const std::string v2_prefix = "0::/"; if (!cgroup.starts_with(v2_prefix)) - return ""; + return {}; cgroup = cgroup.substr(v2_prefix.length()); - return cgroup; + /// Note: The 'root' cgroup can have an empty cgroup name, this is valid + return default_cgroups_mount / cgroup; #else - return ""; + return {}; #endif } diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h index 70219d87cd1..cfb916ff358 100644 --- a/base/base/cgroupsv2.h +++ b/base/base/cgroupsv2.h @@ -1,7 +1,6 @@ #pragma once #include -#include #if defined(OS_LINUX) /// I think it is possible to mount the cgroups hierarchy somewhere else (e.g. when in containers). @@ -16,7 +15,7 @@ bool cgroupsV2Enabled(); /// Assumes that cgroupsV2Enabled() is enabled. bool cgroupsV2MemoryControllerEnabled(); -/// Which cgroup does the process belong to? -/// Returns an empty string if the cgroup cannot be determined. +/// Detects which cgroup v2 the process belongs to and returns the filesystem path to the cgroup. +/// Returns an empty path the cgroup cannot be determined. /// Assumes that cgroupsV2Enabled() is enabled. -std::string cgroupV2OfProcess(); +std::filesystem::path cgroupV2PathOfProcess(); diff --git a/base/base/defines.h b/base/base/defines.h index 2fc54c37bde..5685a6d9833 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -87,10 +87,13 @@ # define ASAN_POISON_MEMORY_REGION(a, b) #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) - #if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER) - #define ABORT_ON_LOGICAL_ERROR - #endif +/// We used to have only ABORT_ON_LOGICAL_ERROR macro, but most of its uses were actually in places where we didn't care about logical errors +/// but wanted to check exactly if the current build type is debug or with sanitizer. This new macro is introduced to fix those places. +#if !defined(DEBUG_OR_SANITIZER_BUILD) +# if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) \ + || defined(UNDEFINED_BEHAVIOR_SANITIZER) +# define DEBUG_OR_SANITIZER_BUILD +# endif #endif /// chassert(x) is similar to assert(x), but: @@ -101,7 +104,7 @@ /// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds, /// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) #if !defined(chassert) - #if defined(ABORT_ON_LOGICAL_ERROR) +# if defined(DEBUG_OR_SANITIZER_BUILD) // clang-format off #include namespace DB diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index f47cba9833d..afdb6ba068a 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -23,8 +23,9 @@ std::optional getCgroupsV2MemoryLimit() if (!cgroupsV2MemoryControllerEnabled()) return {}; - std::string cgroup = cgroupV2OfProcess(); - auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + std::filesystem::path current_cgroup = cgroupV2PathOfProcess(); + if (current_cgroup.empty()) + return {}; /// Open the bottom-most nested memory limit setting file. If there is no such file at the current /// level, try again at the parent level as memory settings are inherited. diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index bb776fa9506..d69646d3694 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54488) +SET(VERSION_REVISION 54489) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 7) +SET(VERSION_MINOR 8) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9) -SET(VERSION_DESCRIBE v24.7.1.1-testing) -SET(VERSION_STRING 24.7.1.1) +SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af) +SET(VERSION_DESCRIBE v24.8.1.1-testing) +SET(VERSION_STRING 24.8.1.1) # end of autochange diff --git a/contrib/grpc b/contrib/grpc index f5b7fdc2dff..1716359d2e2 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit f5b7fdc2dff09ada06dbf6c75df298fb40f898df +Subproject commit 1716359d2e28d304a250f9df0e6c0ccad03de8db diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 1fbfd29a3bd..191886981c3 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,11 +34,7 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") - else() - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") - endif() + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() diff --git a/contrib/jemalloc-cmake/README b/contrib/jemalloc-cmake/README index 91b58448c1f..89d7c818445 100644 --- a/contrib/jemalloc-cmake/README +++ b/contrib/jemalloc-cmake/README @@ -4,3 +4,14 @@ It allows to integrate JEMalloc into CMake project. - Added JEMALLOC_CONFIG_MALLOC_CONF substitution - Add musl support (USE_MUSL) - Also note, that darwin build requires JEMALLOC_PREFIX, while others do not +- JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE should be disabled + + CLOCK_MONOTONIC_COARSE can go backwards after clock_adjtime(ADJ_FREQUENCY) + Let's disable it for now, and this menas that CLOCK_MONOTONIC will be used, + and this, should not be a problem, since: + - jemalloc do not call clock_gettime() that frequently + - the difference is CLOCK_MONOTONIC 20ns and CLOCK_MONOTONIC_COARSE 4ns + + This can be done with the following command: + + gg JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE | cut -d: -f1 | xargs sed -i 's@#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE@/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */@' diff --git a/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in index e027ee97d1a..b58d5c1eb8e 100644 --- a/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in @@ -96,7 +96,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in index 597f2d59933..8bfc4b60f3d 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in index 0a72e71496b..d5197c6127f 100644 --- a/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in index 597f2d59933..8bfc4b60f3d 100644 --- a/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in index 531f2bca0c2..b3f3a489a83 100644 --- a/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in @@ -96,7 +96,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in index d21098a4dcc..cf9acdb0b80 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in index e08a2bed2ec..729bd82c2fa 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in @@ -99,7 +99,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/libunwind b/contrib/libunwind index d6a01c46327..fe854449e24 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit d6a01c46327e56fd86beb8aaa31591fcd9a6b7df +Subproject commit fe854449e24bedfa26e38465b84374312dbd587f diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 37a2f29afcf..b566e8cb9b3 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -4,9 +4,6 @@ set(LIBUNWIND_CXX_SOURCES "${LIBUNWIND_SOURCE_DIR}/src/libunwind.cpp" "${LIBUNWIND_SOURCE_DIR}/src/Unwind-EHABI.cpp" "${LIBUNWIND_SOURCE_DIR}/src/Unwind-seh.cpp") -if (APPLE) - set(LIBUNWIND_CXX_SOURCES ${LIBUNWIND_CXX_SOURCES} "${LIBUNWIND_SOURCE_DIR}/src/Unwind_AppleExtras.cpp") -endif () set(LIBUNWIND_C_SOURCES "${LIBUNWIND_SOURCE_DIR}/src/UnwindLevel1.c" @@ -32,6 +29,7 @@ set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1) +target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_REMEMBER_STACK_ALLOC=1) # NOTE: from this macros sizeof(unw_context_t)/sizeof(unw_cursor_t) is depends, so it should be set always target_compile_definitions(unwind PUBLIC -D_LIBUNWIND_IS_NATIVE_ONLY) diff --git a/contrib/openssl b/contrib/openssl index ee2bb8513b2..66deddc1e53 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit ee2bb8513b28bf86b35404dd17a0e29305ca9e08 +Subproject commit 66deddc1e53cda8706604a019777259372d1bd62 diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt index 85de0340996..a9e4a3df698 100644 --- a/contrib/openssl-cmake/CMakeLists.txt +++ b/contrib/openssl-cmake/CMakeLists.txt @@ -1298,7 +1298,6 @@ elseif(ARCH_PPC64LE) ${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c - ${OPENSSL_SOURCE_DIR}/crypto/mem_clr.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 2512268be0f..4cac2ee6135 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -26,7 +26,10 @@ RUN apt-get update \ zstd \ --yes --no-install-recommends \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \ + && groupadd --system --gid 1000 clickhouse \ + && useradd --system --gid 1000 --uid 1000 -m clickhouse +# ^ For some reason, groupadd and useradd are needed for tests with 'expect', but I don't know, why. COPY requirements.txt / RUN pip3 install --no-cache-dir -r /requirements.txt diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 0d975d64010..970bf12a81a 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT stage=${stage:-} # Compiler version, normally set by Dockerfile -export LLVM_VERSION=${LLVM_VERSION:-17} +export LLVM_VERSION=${LLVM_VERSION:-18} # A variable to pass additional flags to CMake. # Here we explicitly default it to nothing so that bash doesn't complain about diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile index e6eb2ae336e..3ffae0cd921 100644 --- a/docker/test/libfuzzer/Dockerfile +++ b/docker/test/libfuzzer/Dockerfile @@ -33,13 +33,9 @@ RUN apt-get update \ COPY requirements.txt / RUN pip3 install --no-cache-dir -r /requirements.txt -COPY * / - ENV FUZZER_ARGS="-max_total_time=60" SHELL ["/bin/bash", "-c"] -CMD set -o pipefail \ - && timeout -s 9 1h /run_libfuzzer.py 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log # docker run --network=host --volume :/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/libfuzzer diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 1f28d1ac6ea..df85c047649 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -4,6 +4,9 @@ source /setup_export_logs.sh set -e -x +MAX_RUN_TIME=${MAX_RUN_TIME:-3600} +MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 3600 : MAX_RUN_TIME)) + # Choose random timezone for this test run TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" echo "Choosen random timezone $TZ" @@ -25,7 +28,7 @@ source /utils.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence & ./setup_minio.sh stateful -./mc admin trace clickminio > /test_output/rubbish.log & +./mc admin trace clickminio > /test_output/minio.log & MC_ADMIN_PID=$! config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -188,8 +191,8 @@ else ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" - clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" else @@ -197,7 +200,7 @@ else clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" fi clickhouse-client --query "SHOW TABLES FROM test" @@ -242,7 +245,22 @@ function run_tests() } export -f run_tests -timeout "$MAX_RUN_TIME" bash -c run_tests ||: + +function timeout_with_logging() { + local exit_code=0 + + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + + if [[ "${exit_code}" -eq "124" ]] + then + echo "The command 'timeout ${*}' has been killed by timeout" + fi + + return $exit_code +} + +TIMEOUT=$((MAX_RUN_TIME - 700)) +timeout_with_logging "$TIMEOUT" bash -c run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ae7ed1bcd20..670a2420ddf 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -6,18 +6,12 @@ source /setup_export_logs.sh # fail on errors, verbose and export all env variables set -e -x -a -MAX_RUN_TIME=${MAX_RUN_TIME:-7200} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 7200 : MAX_RUN_TIME)) +MAX_RUN_TIME=${MAX_RUN_TIME:-9000} +MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 9000 : MAX_RUN_TIME)) USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} -RUN_SEQUENTIAL_TESTS_IN_PARALLEL=1 - -if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then - RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 -fi - # Choose random timezone for this test run. # # NOTE: that clickhouse-test will randomize session_timezone by itself as well @@ -54,7 +48,7 @@ source /utils.lib /usr/share/clickhouse-test/config/install.sh ./setup_minio.sh stateless -m./c admin trace clickminio > /test_output/rubbish.log & +./mc admin trace clickminio > /test_output/minio.log & MC_ADMIN_PID=$! ./setup_hdfs_minicluster.sh @@ -101,53 +95,6 @@ if [ "$NUM_TRIES" -gt "1" ]; then mkdir -p /var/run/clickhouse-server fi -# Run a CH instance to execute sequential tests on it in parallel with all other tests. -if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then - mkdir -p /var/run/clickhouse-server3 /etc/clickhouse-server3 /var/lib/clickhouse3 - cp -r -L /etc/clickhouse-server/* /etc/clickhouse-server3/ - - sudo chown clickhouse:clickhouse /var/run/clickhouse-server3 /var/lib/clickhouse3 /etc/clickhouse-server3/ - sudo chown -R clickhouse:clickhouse /etc/clickhouse-server3/* - - function replace(){ - sudo find /etc/clickhouse-server3/ -type f -name '*.xml' -exec sed -i "$1" {} \; - } - - replace "s|9000|19000|g" - replace "s|9440|19440|g" - replace "s|9988|19988|g" - replace "s|9234|19234|g" - replace "s|9181|19181|g" - replace "s|8443|18443|g" - replace "s|9000|19000|g" - replace "s|9181|19181|g" - replace "s|9440|19440|g" - replace "s|9010|19010|g" - replace "s|9004|19004|g" - replace "s|9005|19005|g" - replace "s|9009|19009|g" - replace "s|8123|18123|g" - replace "s|/var/lib/clickhouse/|/var/lib/clickhouse3/|g" - replace "s|/etc/clickhouse-server/|/etc/clickhouse-server3/|g" - # distributed cache - replace "s|10001|10003|g" - replace "s|10002|10004|g" - - sudo -E -u clickhouse /usr/bin/clickhouse server --daemon --config /etc/clickhouse-server3/config.xml \ - --pid-file /var/run/clickhouse-server3/clickhouse-server.pid \ - -- --path /var/lib/clickhouse3/ --logger.stderr /var/log/clickhouse-server/stderr3.log \ - --logger.log /var/log/clickhouse-server/clickhouse-server3.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server3.err.log \ - --tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \ - --prometheus.port 19988 --keeper_server.raft_configuration.server.port 19234 --keeper_server.tcp_port 19181 \ - --mysql_port 19004 --postgresql_port 19005 - - for _ in {1..100} - do - clickhouse-client --port 19000 --query "SELECT 1" && break - sleep 1 - done -fi - # simplest way to forward env variables to server sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid @@ -183,9 +130,6 @@ if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then --keeper_server.tcp_port 29181 --keeper_server.server_id 3 \ --prometheus.port 29988 \ --macros.shard s2 # It doesn't work :( - - MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours) - MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) fi if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then @@ -210,9 +154,6 @@ if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then --keeper_server.tcp_port 19181 --keeper_server.server_id 2 \ --prometheus.port 19988 \ --macros.replica r2 # It doesn't work :( - - MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours) - MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) fi # Wait for the server to start, but not for too long. @@ -223,7 +164,6 @@ do done setup_logs_replication - attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01 function fn_exists() { @@ -284,11 +224,7 @@ function run_tests() else # All other configurations are OK. ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('5') - fi - - if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then - ADDITIONAL_OPTIONS+=('--run-sequential-tests-in-parallel') + ADDITIONAL_OPTIONS+=('8') fi if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then @@ -373,9 +309,6 @@ done # Because it's the simplest way to read it when server has crashed. sudo clickhouse stop ||: -if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then - sudo clickhouse stop --pid-path /var/run/clickhouse-server3 ||: -fi if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: @@ -393,12 +326,6 @@ rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & -if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then - rg -Fa "" /var/log/clickhouse-server3/clickhouse-server.log ||: - rg -A50 -Fa "============" /var/log/clickhouse-server3/stderr.log ||: - zstd --threads=0 < /var/log/clickhouse-server3/clickhouse-server.log > /test_output/clickhouse-server3.log.zst & -fi - data_path_config="--path=/var/lib/clickhouse/" if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then # We need s3 storage configuration (but it's more likely that clickhouse-local will fail for some reason) @@ -419,10 +346,6 @@ if [ $failed_to_save_logs -ne 0 ]; then do clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: - if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then - clickhouse-local --path /var/lib/clickhouse3/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.3.tsv.zst ||: - fi - if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: @@ -464,12 +387,6 @@ rm -rf /var/lib/clickhouse/data/system/*/ tar -chf /test_output/store.tar /var/lib/clickhouse/store ||: tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||: -if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then - rm -rf /var/lib/clickhouse3/data/system/*/ - tar -chf /test_output/store.tar /var/lib/clickhouse3/store ||: - tar -chf /test_output/metadata.tar /var/lib/clickhouse3/metadata/*.sql ||: -fi - if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: diff --git a/docker/test/stateless/setup_hdfs_minicluster.sh b/docker/test/stateless/setup_hdfs_minicluster.sh index 6671e73562a..15a54f59096 100755 --- a/docker/test/stateless/setup_hdfs_minicluster.sh +++ b/docker/test/stateless/setup_hdfs_minicluster.sh @@ -10,7 +10,7 @@ cd hadoop-3.3.1 export JAVA_HOME=/usr mkdir -p target/test/data chown clickhouse ./target/test/data -sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 >> /test_output/garbage.log 2>&1 & +sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 >> /test_output/hdfs_minicluster.log 2>&1 & while ! nc -z localhost 12222; do sleep 1 diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 86467394513..b21114e456f 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -209,9 +209,9 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" -clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" -clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" -clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" +clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" +clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" +clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index f13bb576f79..babddbd228c 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -17,6 +17,7 @@ ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=18 RUN apt-get update \ && apt-get install \ + sudo \ apt-transport-https \ apt-utils \ ca-certificates \ diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index 759d97823e2..dd97b6081e8 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-16 +## Install Clang-18 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do ``` diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 0a1fe58b16f..4b11fb71230 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -226,15 +226,59 @@ Other IDEs you can use are [Sublime Text](https://www.sublimetext.com/), [Visual ## Writing Code {#writing-code} -The description of ClickHouse architecture can be found here: https://clickhouse.com/docs/en/development/architecture/ +Below you can find some quick links which may be useful when writing code for ClickHouse: -The Code Style Guide: https://clickhouse.com/docs/en/development/style/ +- [ClickHouse architecture description](https://clickhouse.com/docs/en/development/architecture/). +- [The code style guide](https://clickhouse.com/docs/en/development/style/). +- [Adding third-party libraries](https://clickhouse.com/docs/en/development/contrib/#adding-third-party-libraries) +- [Writing tests](https://clickhouse.com/docs/en/development/tests/) +- [List of open issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3Ahacktoberfest) -Adding third-party libraries: https://clickhouse.com/docs/en/development/contrib/#adding-third-party-libraries +## Writing Documentation {#writing-documentation} -Writing tests: https://clickhouse.com/docs/en/development/tests/ +As part of every pull request which adds a new feature, it is necessary to write documentation for it. If you'd like to preview your documentation changes the instructions for how to build the documentation page locally are available in the README.md file [here](https://github.com/ClickHouse/clickhouse-docs). When adding a new function to ClickHouse you can use the template below as a guide: -List of tasks: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3Ahacktoberfest +```markdown +# newFunctionName + +A short description of the function goes here. It should describe briefly what it does and a typical usage case. + +**Syntax** + +\```sql +newFunctionName(arg1, arg2[, arg3]) +\``` + +**Arguments** + +- `arg1` — Description of the argument. [DataType](../data-types/float.md) +- `arg2` — Description of the argument. [DataType](../data-types/float.md) +- `arg3` — Description of optional argument (optional). [DataType](../data-types/float.md) + +**Implementation Details** + +A description of implementation details if relevant. + +**Returned value** + +- Returns {insert what the function returns here}. [DataType](../data-types/float.md) + +**Example** + +Query: + +\```sql +SELECT 'write your example query here'; +\``` + +Response: + +\```response +┌───────────────────────────────────┐ +│ the result of the query │ +└───────────────────────────────────┘ +\``` +``` ## Test Data {#test-data} diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 98e73dec451..6209ef3c8ee 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -15,7 +15,7 @@ You have four options for getting up and running with ClickHouse: - **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse - **[Quick Install](#quick-install):** an easy-to-download binary for testing and developing with ClickHouse -- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture +- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, modern ARM (ARMv8.2-A up), or PowerPC64LE CPU architecture - **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** use the official Docker image in Docker Hub ## ClickHouse Cloud diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index e18ff6f1a3f..849b3d4b486 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -185,6 +185,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--format, -f` – Use the specified default format to output the result. - `--vertical, -E` – If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `–format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. - `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode. +- `--memory-usage` – If specified, print memory usage to ‘stderr’ in non-interactive mode]. Possible values: 'none' - do not print memory usage, 'default' - print number of bytes, 'readable' - print memory usage in human-readable format. - `--stacktrace` – If specified, also print the stack trace if an exception occurs. - `--config-file` – The name of the configuration file. - `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). @@ -339,7 +340,7 @@ clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000 Connect to one of provides hosts: `192.168.1.15`, `192.168.1.25`. ``` bash -clickhouse-client clickhouse://192.168.1.15,192.168.1.25 +clickhouse-client clickhouse://192.168.1.15,192.168.1.25 ``` ### Configuration Files {#configuration_files} @@ -367,7 +368,7 @@ Example of a config file: ``` Or the same config in a YAML format: - + ```yaml user: username password: 'password' diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index b91b794d2d6..9c4c082bc3a 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -67,6 +67,7 @@ The supported formats are: | [Prometheus](#prometheus) | ✗ | ✔ | | [Protobuf](#protobuf) | ✔ | ✔ | | [ProtobufSingle](#protobufsingle) | ✔ | ✔ | +| [ProtobufList](#protobuflist) | ✔ | ✔ | | [Avro](#data-format-avro) | ✔ | ✔ | | [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | | [Parquet](#data-format-parquet) | ✔ | ✔ | @@ -1952,6 +1953,35 @@ SYSTEM DROP FORMAT SCHEMA CACHE FOR Protobuf Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters. +## ProtobufList {#protobuflist} + +Similar to Protobuf but rows are represented as a sequence of sub-messages contained in a message with fixed name "Envelope". + +Usage example: + +``` sql +SELECT * FROM test.table FORMAT ProtobufList SETTINGS format_schema = 'schemafile:MessageType' +``` + +``` bash +cat protobuflist_messages.bin | clickhouse-client --query "INSERT INTO test.table FORMAT ProtobufList SETTINGS format_schema='schemafile:MessageType'" +``` + +where the file `schemafile.proto` looks like this: + +``` capnp +syntax = "proto3"; +message Envelope { + message MessageType { + string name = 1; + string surname = 2; + uint32 birthDate = 3; + repeated string phoneNumbers = 4; + }; + MessageType row = 1; +}; +``` + ## Avro {#data-format-avro} [Apache Avro](https://avro.apache.org/) is a row-oriented data serialization framework developed within Apache’s Hadoop project. diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index 42820505406..30a8d9569a9 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -6,7 +6,20 @@ sidebar_label: MySQL Interface # MySQL Interface -ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-compatible to interact with ClickHouse seamlessly (e.g. [Looker Studio](../integrations/data-visualization/looker-studio-and-clickhouse.md)). +ClickHouse supports the MySQL wire protocol. This allows certain clients that do not have native ClickHouse connectors leverage the MySQL protocol instead, and it has been validated with the following BI tools: + +- [Looker Studio](../integrations/data-visualization/looker-studio-and-clickhouse.md) +- [Tableau Online](../integrations/tableau-online) +- [QuickSight](../integrations/quicksight) + +If you are trying other untested clients or integrations, keep in mind that there could be the following limitations: + +- SSL implementation might not be fully compatible; there could be potential [TLS SNI](https://www.cloudflare.com/learning/ssl/what-is-sni/) issues. +- A particular tool might require dialect features (e.g., MySQL-specific functions or settings) that are not implemented yet. + +If there is a native driver available (e.g., [DBeaver](../integrations/dbeaver)), it is always preferred to use it instead of the MySQL interface. Additionally, while most of the MySQL language clients should work fine, MySQL interface is not guaranteed to be a drop-in replacement for a codebase with existing MySQL queries. + +If your use case involves a particular tool that does not have a native ClickHouse driver, and you would like to use it via the MySQL interface and you found certain incompatibilities - please [create an issue](https://github.com/ClickHouse/ClickHouse/issues) in the ClickHouse repository. ## Enabling the MySQL Interface On ClickHouse Cloud diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 57fea3cca3a..4a818410ebe 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -124,7 +124,7 @@ which is equal to #### Default values for from_env and from_zk attributes -It's possible to set the default value and substitute it only if the environment variable or zookeeper node is set using `replace="1"`. +It's possible to set the default value and substitute it only if the environment variable or zookeeper node is set using `replace="1"` (must be declared before from_env). With previous example, but `MAX_QUERY_SIZE` is unset: @@ -132,7 +132,7 @@ With previous example, but `MAX_QUERY_SIZE` is unset: - 150000 + 150000 diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index 96477f777a9..ef1e58fd18e 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -22,6 +22,21 @@ Structure of the `users` section: + + + ssh-ed25519 + AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj + + + ecdsa-sha2-nistp256 + AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBNxeV2uN5UY6CUbCzTA1rXfYimKQA5ivNIqxdax4bcMXz4D0nSk2l5E1TkR5mG8EBWtmExSPbcEPJ8V7lyWWbA8= + + + ssh-rsa + AAAAB3NzaC1yc2EAAAADAQABAAABgQCpgqL1SHhPVBOTFlOm0pu+cYBbADzC2jL41sPMawYCJHDyHuq7t+htaVVh2fRgpAPmSEnLEC2d4BEIKMtPK3bfR8plJqVXlLt6Q8t4b1oUlnjb3VPA9P6iGcW7CV1FBkZQEVx8ckOfJ3F+kI5VsrRlEDgiecm/C1VPl0/9M2llW/mPUMaD65cM9nlZgM/hUeBrfxOEqM11gDYxEZm1aRSbZoY4dfdm3vzvpSQ6lrCrkjn3X2aSmaCLcOWJhfBWMovNDB8uiPuw54g3ioZ++qEQMlfxVsqXDGYhXCrsArOVuW/5RbReO79BvXqdssiYShfwo+GhQ0+aLWMIW/jgBkkqx/n7uKLzCMX7b2F+aebRYFh+/QXEj7SnihdVfr9ud6NN3MWzZ1ltfIczlEcFLrLJ1Yq57wW6wXtviWh59WvTWFiPejGjeSjjJyqqB49tKdFVFuBnIU5u/bch2DXVgiAEdQwUrIp1ACoYPq22HFFAYUJrL32y7RxX3PGzuAv3LOc= + + + 0|1 @@ -79,6 +94,24 @@ Password can be specified in plaintext or in SHA256 (hex format). The first line of the result is the password. The second line is the corresponding double SHA1 hash. +### username/ssh-key {#user-sshkey} + +This setting allows authenticating with SSH keys. + +Given a SSH key (as generated by `ssh-keygen`) like +``` +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj john@example.com +``` +The `ssh_key` element is expected to be +``` + + ssh-ed25519 + AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj + +``` + +Substitute `ssh-ed25519` with `ssh-rsa` or `ecdsa-sha2-nistp256` for the other supported algorithms. + ### access_management {#access_management-user-setting} This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user. diff --git a/docs/en/operations/system-tables/detached_tables.md b/docs/en/operations/system-tables/detached_tables.md new file mode 100644 index 00000000000..f669c8fd251 --- /dev/null +++ b/docs/en/operations/system-tables/detached_tables.md @@ -0,0 +1,35 @@ +--- +slug: /en/operations/system-tables/detached_tables +--- +# detached_tables + +Contains information of each detached table. + +Columns: + +- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. + +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. + +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database). + +- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. + +- `is_permanently` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag indicates that the table was detached PERMANENTLY. + + +**Example** + +```sql +SELECT * FROM system.detached_tables FORMAT Vertical; +``` + +```text +Row 1: +────── +database: base +table: t1 +uuid: 81b1c20a-b7c6-4116-a2ce-7583fb6b6736 +metadata_path: /var/lib/clickhouse/store/461/461cf698-fd0b-406d-8c01-5d8fd5748a91/t1.sql +is_permanently: 1 +``` diff --git a/docs/en/operations/system-tables/functions.md b/docs/en/operations/system-tables/functions.md index d52bf24f289..da0ef04bd27 100644 --- a/docs/en/operations/system-tables/functions.md +++ b/docs/en/operations/system-tables/functions.md @@ -9,7 +9,6 @@ Columns: - `name` ([String](../../sql-reference/data-types/string.md)) – The name of the function. - `is_aggregate` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Whether the function is an aggregate function. -- `is_deterministic` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md))) - Whether the function is deterministic. - `case_insensitive`, ([UInt8](../../sql-reference/data-types/int-uint.md)) - Whether the function name can be used case-insensitively. - `alias_to`, ([String](../../sql-reference/data-types/string.md)) - The original function name, if the function name is an alias. - `create_query`, ([String](../../sql-reference/data-types/enum.md)) - Unused. diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index e5a8ce5d18b..1737fdd88b2 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -96,3 +96,22 @@ Result: │ 1 │ [2] │ [[4,1]] │ └───────────┴───────────┴───────────┘ ``` + +## Reading nested subcolumns from Array + +If nested type `T` inside `Array` has subcolumns (for example, if it's a [named tuple](./tuple.md)), you can read its subcolumns from an `Array(T)` type with the same subcolumn names. The type of a subcolumn will be `Array` of the type of original subcolumn. + +**Example** + +```sql +CREATE TABLE t_arr (arr Array(Tuple(field1 UInt32, field2 String))) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO t_arr VALUES ([(1, 'Hello'), (2, 'World')]), ([(3, 'This'), (4, 'is'), (5, 'subcolumn')]); +SELECT arr.field1, toTypeName(arr.field1), arr.field2, toTypeName(arr.field2) from t_arr; +``` + +```test +┌─arr.field1─┬─toTypeName(arr.field1)─┬─arr.field2────────────────┬─toTypeName(arr.field2)─┐ +│ [1,2] │ Array(UInt32) │ ['Hello','World'] │ Array(String) │ +│ [3,4,5] │ Array(UInt32) │ ['This','is','subcolumn'] │ Array(String) │ +└────────────┴────────────────────────┴───────────────────────────┴────────────────────────┘ +``` diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index 133ac2bd72e..a92f6f4dce6 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -56,7 +56,6 @@ Functions: ## Related content -- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer) -- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) - Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) +- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 8f9fd9abb0d..b7e4094f30e 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -76,7 +76,7 @@ WHERE macro = 'test'; └───────┴──────────────┘ ``` -## FQDN +## fqdn Returns the fully qualified domain name of the ClickHouse server. @@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server. fqdn(); ``` -Aliases: `fullHostName`, 'FQDN'. +Aliases: `fullHostName`, `FQDN`. **Returned value** @@ -2984,6 +2984,66 @@ Result: └─────────┘ ``` +## partitionID + +Computes the [partition ID](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). + +:::note +This function is slow and should not be called for large amount of rows. +::: + +**Syntax** + +```sql +partitionID(x[, y, ...]); +``` + +**Arguments** + +- `x` — Column for which to return the partition ID. +- `y, ...` — Remaining N columns for which to return the partition ID (optional). + +**Returned Value** + +- Partition ID that the row would belong to. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + i int, + j int +) +ENGINE = MergeTree +PARTITION BY i +ORDER BY tuple(); + +INSERT INTO tab VALUES (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); + +SELECT i, j, partitionID(i), _partition_id FROM tab ORDER BY i, j; +``` + +Result: + +```response +┌─i─┬─j─┬─partitionID(i)─┬─_partition_id─┐ +│ 1 │ 1 │ 1 │ 1 │ +│ 1 │ 2 │ 1 │ 1 │ +│ 1 │ 3 │ 1 │ 1 │ +└───┴───┴────────────────┴───────────────┘ +┌─i─┬─j─┬─partitionID(i)─┬─_partition_id─┐ +│ 2 │ 4 │ 2 │ 2 │ +│ 2 │ 5 │ 2 │ 2 │ +│ 2 │ 6 │ 2 │ 2 │ +└───┴───┴────────────────┴───────────────┘ +``` + + ## shardNum Returns the index of a shard which processes a part of data in a distributed query. Indices are started from `1`. diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index e990023efbc..0bde207dcc9 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -567,12 +567,13 @@ While no standard or recommendation exists for the epoch of Snowflake IDs, imple **Syntax** ``` sql -generateSnowflakeID([expr]) +generateSnowflakeID([expr, [machine_id]]) ``` **Arguments** - `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional. +- `machine_id` — A machine ID, the lowest 10 bits are used. [Int64](../data-types/int-uint.md). Optional. **Returned value** @@ -608,6 +609,16 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2); └────────────────────────┴────────────────────────┘ ``` +**Example with expression and a machine ID** + +``` +SELECT generateSnowflakeID('expr', 1); + +┌─generateSnowflakeID('expr', 1)─┐ +│ 7201148511606784002 │ +└────────────────────────────────┘ +``` + ## snowflakeToDateTime :::warning diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 34c6016235a..96d9d26977d 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column: - Must contain an ordered sequence. - Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md). -- Can’t be the only column in the `JOIN` clause. +- For `hash` join algorithm it can’t be the only column in the `JOIN` clause. Syntax `ASOF JOIN ... ON`: @@ -337,7 +337,8 @@ For example, consider the following tables: `ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` can’t be joined. :::note -`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. +`ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms. +It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. ::: ## PASTE JOIN Usage diff --git a/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md index 20dfd35d5db..ca3959d6ed1 100644 --- a/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md +++ b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/table-functions/azureBlobStorageCluster -sidebar_position: 55 +sidebar_position: 15 sidebar_label: azureBlobStorageCluster title: "azureBlobStorageCluster Table Function" --- diff --git a/docs/ru/getting-started/playground.md b/docs/ru/getting-started/playground.md index c8c987eec9e..a2d5498fb9a 100644 --- a/docs/ru/getting-started/playground.md +++ b/docs/ru/getting-started/playground.md @@ -6,38 +6,38 @@ sidebar_label: Playground # ClickHouse Playground {#clickhouse-playground} -[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. -Several example datasets are available in Playground. +[ClickHouse Playground](https://play.clickhouse.com/play?user=play) позволяет пользователям экспериментировать с ClickHouse, выполняя запросы мгновенно, без необходимости настройки сервера или кластера. +В Playground доступны несколько примеров наборов данных. -You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). +Вы можете выполнять запросы к Playground, используя любой HTTP-клиент, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), или настроить соединение, используя драйверы [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Дополнительную информацию о программных продуктах, поддерживающих ClickHouse, можно найти [здесь](../interfaces/index.md). -## Credentials {#credentials} +## Учетные данные {#credentials} -| Parameter | Value | +| Параметр | Значение | |:--------------------|:-----------------------------------| -| HTTPS endpoint | `https://play.clickhouse.com:443/` | -| Native TCP endpoint | `play.clickhouse.com:9440` | -| User | `explorer` or `play` | -| Password | (empty) | +| HTTPS-адрес | `https://play.clickhouse.com:443/` | +| TCP-адрес | `play.clickhouse.com:9440` | +| Пользователь | `explorer` или `play` | +| Пароль | (пусто) | -## Limitations {#limitations} +## Ограничения {#limitations} -The queries are executed as a read-only user. It implies some limitations: +Запросы выполняются от имени пользователя с правами только на чтение. Это предполагает некоторые ограничения: -- DDL queries are not allowed -- INSERT queries are not allowed +- DDL-запросы не разрешены +- INSERT-запросы не разрешены -The service also have quotas on its usage. +Сервис также имеет квоты на использование. -## Examples {#examples} +## Примеры {#examples} -HTTPS endpoint example with `curl`: +Пример использования HTTPS-адреса с `curl`: -``` bash +```bash curl "https://play.clickhouse.com/?user=explorer" --data-binary "SELECT 'Play ClickHouse'" ``` -TCP endpoint example with [CLI](../interfaces/cli.md): +Пример использования TCP-адреса с [CLI](../interfaces/cli.md): ``` bash clickhouse client --secure --host play.clickhouse.com --user explorer diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md index c91d8bcf4d1..cd77a8c03cf 100644 --- a/docs/zh/changelog/index.md +++ b/docs/zh/changelog/index.md @@ -356,7 +356,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7" #### 新功能 {#new-feature-1} -- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。 这个新功能是由Altinity的特殊要求添加到错误修正版本中的。 +- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。 [#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) ### ClickHouse版本v20.1.2.4,2020-01-22 {#clickhouse-release-v20-1-2-4-2020-01-22} diff --git a/docs/zh/sql-reference/functions/conditional-functions.md b/docs/zh/sql-reference/functions/conditional-functions.md index e0f8f7a4777..378e4d897c0 100644 --- a/docs/zh/sql-reference/functions/conditional-functions.md +++ b/docs/zh/sql-reference/functions/conditional-functions.md @@ -82,14 +82,14 @@ FROM LEFT_RIGHT SELECT left, right, - if(left < right, 'left is smaller than right', 'right is greater or equal than left') AS is_smaller + if(left < right, 'left is smaller than right', 'right is smaller or equal than left') AS is_smaller FROM LEFT_RIGHT WHERE isNotNull(left) AND isNotNull(right) ┌─left─┬─right─┬─is_smaller──────────────────────────┐ │ 1 │ 3 │ left is smaller than right │ -│ 2 │ 2 │ right is greater or equal than left │ -│ 3 │ 1 │ right is greater or equal than left │ +│ 2 │ 2 │ right is smaller or equal than left │ +│ 3 │ 1 │ right is smaller or equal than left │ └──────┴───────┴─────────────────────────────────────┘ ``` diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 6b3a0b16624..ce3a4659e0e 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -4,6 +4,9 @@ if (USE_CLANG_TIDY) set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") endif () +set(MAX_LINKER_MEMORY 3500) +include(../cmake/limit_jobs.cmake) + include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake) # The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 0126613f797..887c5cb86bc 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -24,9 +24,8 @@ #include #include #include - +#include #include -#include #include #include @@ -49,6 +48,8 @@ #include #include +#include + namespace fs = std::filesystem; using namespace std::literals; @@ -185,6 +186,8 @@ void Client::parseConnectionsCredentials(Poco::Util::AbstractConfiguration & con history_file = home_path + "/" + history_file.substr(1); config.setString("history_file", history_file); } + if (config.has(prefix + ".accept-invalid-certificate")) + config.setBool("accept-invalid-certificate", config.getBool(prefix + ".accept-invalid-certificate")); } if (!connection_name.empty() && !connection_found) @@ -276,6 +279,12 @@ void Client::initialize(Poco::Util::Application & self) else if (config().has("connection")) throw Exception(ErrorCodes::BAD_ARGUMENTS, "--connection was specified, but config does not exist"); + if (config().has("accept-invalid-certificate")) + { + config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler"); + config().setString("openSSL.client.verificationMode", "none"); + } + /** getenv is thread-safe in Linux glibc and in all sane libc implementations. * But the standard does not guarantee that subsequent calls will not rewrite the value by returned pointer. * @@ -730,7 +739,7 @@ bool Client::processWithFuzzing(const String & full_query) } if (auto *q = orig_ast->as()) { - if (auto *setDialect = q->changes.tryGet("dialect"); setDialect && setDialect->safeGet() == "kusto") + if (auto *set_dialect = q->changes.tryGet("dialect"); set_dialect && set_dialect->safeGet() == "kusto") return true; } diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml index d0deb818c1e..c32b63413e9 100644 --- a/programs/client/clickhouse-client.xml +++ b/programs/client/clickhouse-client.xml @@ -1,5 +1,6 @@ + true @@ -72,6 +73,7 @@ Default: "hostname" will be used. --> default + 127.0.0.1 9000 diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index 639e0957f49..5f2787f8930 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -45,16 +45,17 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) keeper_context->setDigestEnabled(true); keeper_context->setSnapshotDisk(std::make_shared("Keeper-snapshots", options["output-dir"].as())); - DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); + /// TODO(hanfei): support rocksdb here + DB::KeeperMemoryStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as(), logger); storage.initializeSystemNodes(); DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as(), logger); DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(storage.getZXID(), 1, std::make_shared()); - DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta); + DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta); - DB::KeeperSnapshotManager manager(1, keeper_context); + DB::KeeperSnapshotManager manager(1, keeper_context); auto snp = manager.serializeSnapshotToBuffer(snapshot); auto file_info = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); std::cout << "Snapshot serialized to path:" << fs::path(file_info->disk->getPath()) / file_info->path << std::endl; diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 44c2daa33ad..8cf1a4d1999 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -52,6 +52,10 @@ # include #endif +#if USE_GWP_ASAN +# include +#endif + #include #include @@ -639,6 +643,10 @@ try tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization"); } +#if USE_GWP_ASAN + GWPAsan::initFinished(); +#endif + LOG_INFO(log, "Ready for connections."); diff --git a/programs/library-bridge/LibraryBridge.cpp b/programs/library-bridge/LibraryBridge.cpp index 8a07ca57104..d35e7a9b832 100644 --- a/programs/library-bridge/LibraryBridge.cpp +++ b/programs/library-bridge/LibraryBridge.cpp @@ -1,5 +1,7 @@ #include "LibraryBridge.h" +#include + int mainEntryClickHouseLibraryBridge(int argc, char ** argv) { DB::LibraryBridge app; diff --git a/programs/library-bridge/LibraryBridgeHandlers.cpp b/programs/library-bridge/LibraryBridgeHandlers.cpp index 8d116e537aa..53f4b31ca06 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.cpp +++ b/programs/library-bridge/LibraryBridgeHandlers.cpp @@ -6,6 +6,7 @@ #include "ExternalDictionaryLibraryHandlerFactory.h" #include +#include #include #include #include diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 46b543e49e9..48e0cca7b73 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -423,6 +423,7 @@ void LocalServer::connect() { connection_parameters = ConnectionParameters(getClientConfiguration(), "localhost"); + /// This is needed for table function input(...). ReadBuffer * in; auto table_file = getClientConfiguration().getString("table-file", "-"); if (table_file == "-" || table_file == "stdin") diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index da2466650a7..0715f358313 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -3,11 +3,12 @@ #include #include -#include -#include -#include +#include #include #include +#include +#include +#include #include #include diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 5ff985b3d12..a738d5e8c12 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -2,6 +2,7 @@ #if USE_ODBC +#include #include #include #include diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index cf5acdc4534..2f00e1171bb 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "getIdentifierQuote.h" #include "validateODBCConnectionString.h" #include "ODBCPooledConnectionFactory.h" diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 2cf1576ccd7..404f66eab3b 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp index e91cc3158df..155e26c8a27 100644 --- a/programs/odbc-bridge/ODBCBridge.cpp +++ b/programs/odbc-bridge/ODBCBridge.cpp @@ -1,5 +1,7 @@ #include "ODBCBridge.h" +#include + int mainEntryClickHouseODBCBridge(int argc, char ** argv) { DB::ODBCBridge app; diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp index c7025ca4311..dfb4333240b 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.cpp +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -2,8 +2,10 @@ #if USE_ODBC +#include #include #include +#include #include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 3b88bb36954..16888015f8b 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -68,6 +68,7 @@ #include #include #include +#include #include #include #include @@ -2212,6 +2213,7 @@ try CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability); #if USE_GWP_ASAN + GWPAsan::initFinished(); GWPAsan::setForceSampleProbability(server_settings.gwp_asan_force_sample_probability); #endif @@ -2730,8 +2732,7 @@ void Server::createInterserverServers( void Server::stopServers( std::vector & servers, - const ServerType & server_type -) const + const ServerType & server_type) const { LoggerRawPtr log = &logger(); diff --git a/programs/server/Server.h b/programs/server/Server.h index 3f03dd137ef..feaf61f1ffd 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -129,8 +129,7 @@ private: void stopServers( std::vector & servers, - const ServerType & server_type - ) const; + const ServerType & server_type) const; }; } diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index 353358fac65..95a467bbbe5 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp index 934a8dffd90..5d833796510 100644 --- a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp +++ b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp @@ -118,10 +118,10 @@ AggregateFunctionPtr createAggregateFunctionAnalysisOfVariance(const std::string void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory & factory) { AggregateFunctionProperties properties = { .is_order_dependent = false }; - factory.registerFunction("analysisOfVariance", {createAggregateFunctionAnalysisOfVariance, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("analysisOfVariance", {createAggregateFunctionAnalysisOfVariance, properties}, AggregateFunctionFactory::Case::Insensitive); /// This is widely used term - factory.registerAlias("anova", "analysisOfVariance", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("anova", "analysisOfVariance", AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionAny.cpp b/src/AggregateFunctions/AggregateFunctionAny.cpp index f727ab04aa9..2bcee0fdd5f 100644 --- a/src/AggregateFunctions/AggregateFunctionAny.cpp +++ b/src/AggregateFunctions/AggregateFunctionAny.cpp @@ -361,9 +361,9 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory) AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true}; factory.registerFunction("any", {createAggregateFunctionAny, default_properties}); - factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive); - factory.registerAlias("first_value", "any", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("any_value", "any", AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("first_value", "any", AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("anyLast", {createAggregateFunctionAnyLast, default_properties}); - factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionAnyRespectNulls.cpp b/src/AggregateFunctions/AggregateFunctionAnyRespectNulls.cpp index 7275409c151..0b6642bffac 100644 --- a/src/AggregateFunctions/AggregateFunctionAnyRespectNulls.cpp +++ b/src/AggregateFunctions/AggregateFunctionAnyRespectNulls.cpp @@ -221,11 +221,11 @@ void registerAggregateFunctionsAnyRespectNulls(AggregateFunctionFactory & factor = {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true}; factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls}); - factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive); - factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls}); - factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::Case::Insensitive); /// Must happen after registering any and anyLast factory.registerNullsActionTransformation("any", "any_respect_nulls"); diff --git a/src/AggregateFunctions/AggregateFunctionAvg.cpp b/src/AggregateFunctions/AggregateFunctionAvg.cpp index ac6d2cf7fb4..57b14921c99 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.cpp +++ b/src/AggregateFunctions/AggregateFunctionAvg.cpp @@ -46,6 +46,6 @@ AggregateFunctionPtr createAggregateFunctionAvg(const std::string & name, const void registerAggregateFunctionAvg(AggregateFunctionFactory & factory) { - factory.registerFunction("avg", createAggregateFunctionAvg, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("avg", createAggregateFunctionAvg, AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.cpp b/src/AggregateFunctions/AggregateFunctionBitwise.cpp index 619251552e4..ecced5f3e32 100644 --- a/src/AggregateFunctions/AggregateFunctionBitwise.cpp +++ b/src/AggregateFunctions/AggregateFunctionBitwise.cpp @@ -234,9 +234,9 @@ void registerAggregateFunctionsBitwise(AggregateFunctionFactory & factory) factory.registerFunction("groupBitXor", createAggregateFunctionBitwise); /// Aliases for compatibility with MySQL. - factory.registerAlias("BIT_OR", "groupBitOr", AggregateFunctionFactory::CaseInsensitive); - factory.registerAlias("BIT_AND", "groupBitAnd", AggregateFunctionFactory::CaseInsensitive); - factory.registerAlias("BIT_XOR", "groupBitXor", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("BIT_OR", "groupBitOr", AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("BIT_AND", "groupBitAnd", AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("BIT_XOR", "groupBitXor", AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionCorr.cpp b/src/AggregateFunctions/AggregateFunctionCorr.cpp index 2e8ff3af933..02d3a4aa912 100644 --- a/src/AggregateFunctions/AggregateFunctionCorr.cpp +++ b/src/AggregateFunctions/AggregateFunctionCorr.cpp @@ -9,7 +9,7 @@ template using AggregateFunctionCorr = AggregateFunct void registerAggregateFunctionsStatisticsCorr(AggregateFunctionFactory & factory) { - factory.registerFunction("corr", createAggregateFunctionStatisticsBinary, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("corr", createAggregateFunctionStatisticsBinary, AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionCount.cpp b/src/AggregateFunctions/AggregateFunctionCount.cpp index 25f991ab693..ad3aee90c37 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.cpp +++ b/src/AggregateFunctions/AggregateFunctionCount.cpp @@ -37,7 +37,7 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons void registerAggregateFunctionCount(AggregateFunctionFactory & factory) { AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false }; - factory.registerFunction("count", {createAggregateFunctionCount, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("count", {createAggregateFunctionCount, properties}, AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionCovar.cpp b/src/AggregateFunctions/AggregateFunctionCovar.cpp index 9645685483f..e4877a0aed3 100644 --- a/src/AggregateFunctions/AggregateFunctionCovar.cpp +++ b/src/AggregateFunctions/AggregateFunctionCovar.cpp @@ -13,8 +13,8 @@ void registerAggregateFunctionsStatisticsCovar(AggregateFunctionFactory & factor factory.registerFunction("covarPop", createAggregateFunctionStatisticsBinary); /// Synonyms for compatibility. - factory.registerAlias("COVAR_SAMP", "covarSamp", AggregateFunctionFactory::CaseInsensitive); - factory.registerAlias("COVAR_POP", "covarPop", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("COVAR_SAMP", "covarSamp", AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("COVAR_POP", "covarPop", AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index 6555ae63128..082fa11ca8a 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -6,6 +6,7 @@ #include #include #include +#include static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000; @@ -28,7 +29,7 @@ const String & getAggregateFunctionCanonicalNameIfAny(const String & name) return AggregateFunctionFactory::instance().getCanonicalNameIfAny(name); } -void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness) +void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, Case case_sensitiveness) { if (creator_with_properties.creator == nullptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory: " @@ -38,7 +39,7 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory: the aggregate function name '{}' is not unique", name); - if (case_sensitiveness == CaseInsensitive) + if (case_sensitiveness == Case::Insensitive) { auto key = Poco::toLower(name); if (!case_insensitive_aggregate_functions.emplace(key, creator_with_properties).second) diff --git a/src/AggregateFunctions/AggregateFunctionFactory.h b/src/AggregateFunctions/AggregateFunctionFactory.h index b1dc422fcb0..a5fa3424543 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.h +++ b/src/AggregateFunctions/AggregateFunctionFactory.h @@ -60,7 +60,7 @@ public: void registerFunction( const String & name, Value creator, - CaseSensitiveness case_sensitiveness = CaseSensitive); + Case case_sensitiveness = Case::Sensitive); /// Register how to transform from one aggregate function to other based on NullsAction /// Registers them both ways: diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 16907e0f24f..7034e6373b1 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -840,8 +840,8 @@ void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory) AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; factory.registerFunction("groupArray", { createAggregateFunctionGroupArray, properties }); - factory.registerAlias("array_agg", "groupArray", AggregateFunctionFactory::CaseInsensitive); - factory.registerAliasUnchecked("array_concat_agg", "groupArrayArray", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("array_agg", "groupArray", AggregateFunctionFactory::Case::Insensitive); + factory.registerAliasUnchecked("array_concat_agg", "groupArrayArray", AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties }); factory.registerFunction("groupArrayLast", { createAggregateFunctionGroupArray, properties }); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp index 903adf5c547..20acda213da 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp @@ -150,8 +150,18 @@ public: void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - readVarUInt(this->data(place).version, buf); - this->data(place).value.read(buf); + auto & set = this->data(place).value; + auto & version = this->data(place).version; + size_t size; + readVarUInt(version, buf); + readVarUInt(size, buf); + set.reserve(size); + for (size_t i = 0; i < size; ++i) + { + int key; + readIntBinary(key, buf); + set.insert(key); + } } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override @@ -292,7 +302,7 @@ public: } return new_map; }; - auto new_map = rhs_value.size() < set.size() ? create_new_map(rhs_value, set) : create_new_map(set, rhs_value); + auto new_map = create_new_map(set, rhs_value); set = std::move(new_map); } } @@ -316,11 +326,9 @@ public: readVarUInt(version, buf); readVarUInt(size, buf); set.reserve(size); - UInt64 elem_version; for (size_t i = 0; i < size; ++i) { auto key = readStringBinaryInto(*arena, buf); - readVarUInt(elem_version, buf); set.insert(key); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp index 1c059dc52aa..5494ef74705 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp @@ -277,7 +277,7 @@ void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory) AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties }); - factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp index 736cca11f1e..04eebe9f485 100644 --- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp @@ -350,7 +350,7 @@ AggregateFunctionPtr createAggregateFunctionKolmogorovSmirnovTest( void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory) { - factory.registerFunction("kolmogorovSmirnovTest", createAggregateFunctionKolmogorovSmirnovTest, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("kolmogorovSmirnovTest", createAggregateFunctionKolmogorovSmirnovTest, AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionSecondMoment.cpp b/src/AggregateFunctions/AggregateFunctionSecondMoment.cpp index 80fbe2511d9..4aa6a0a4429 100644 --- a/src/AggregateFunctions/AggregateFunctionSecondMoment.cpp +++ b/src/AggregateFunctions/AggregateFunctionSecondMoment.cpp @@ -15,11 +15,11 @@ void registerAggregateFunctionsStatisticsSecondMoment(AggregateFunctionFactory & factory.registerFunction("stddevPop", createAggregateFunctionStatisticsUnary); /// Synonyms for compatibility. - factory.registerAlias("VAR_SAMP", "varSamp", AggregateFunctionFactory::CaseInsensitive); - factory.registerAlias("VAR_POP", "varPop", AggregateFunctionFactory::CaseInsensitive); - factory.registerAlias("STDDEV_SAMP", "stddevSamp", AggregateFunctionFactory::CaseInsensitive); - factory.registerAlias("STDDEV_POP", "stddevPop", AggregateFunctionFactory::CaseInsensitive); - factory.registerAlias("STD", "stddevPop", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("VAR_SAMP", "varSamp", AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("VAR_POP", "varPop", AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("STDDEV_SAMP", "stddevSamp", AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("STDDEV_POP", "stddevPop", AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("STD", "stddevPop", AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionSum.cpp b/src/AggregateFunctions/AggregateFunctionSum.cpp index e393cb6dd38..910e49f388d 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.cpp +++ b/src/AggregateFunctions/AggregateFunctionSum.cpp @@ -72,7 +72,7 @@ AggregateFunctionPtr createAggregateFunctionSum(const std::string & name, const void registerAggregateFunctionSum(AggregateFunctionFactory & factory) { - factory.registerFunction("sum", createAggregateFunctionSum, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("sum", createAggregateFunctionSum, AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("sumWithOverflow", createAggregateFunctionSum); factory.registerFunction("sumKahan", createAggregateFunctionSum); } diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index 26f756abe18..f949f6b7e4a 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -535,9 +535,9 @@ void registerAggregateFunctionTopK(AggregateFunctionFactory & factory) factory.registerFunction("topK", { createAggregateFunctionTopK, properties }); factory.registerFunction("topKWeighted", { createAggregateFunctionTopK, properties }); - factory.registerFunction("approx_top_k", { createAggregateFunctionTopK, properties }, AggregateFunctionFactory::CaseInsensitive); - factory.registerFunction("approx_top_sum", { createAggregateFunctionTopK, properties }, AggregateFunctionFactory::CaseInsensitive); - factory.registerAlias("approx_top_count", "approx_top_k", AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("approx_top_k", { createAggregateFunctionTopK, properties }, AggregateFunctionFactory::Case::Insensitive); + factory.registerFunction("approx_top_sum", { createAggregateFunctionTopK, properties }, AggregateFunctionFactory::Case::Insensitive); + factory.registerAlias("approx_top_count", "approx_top_k", AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionsMinMax.cpp b/src/AggregateFunctions/AggregateFunctionsMinMax.cpp index 03e21c15a75..5fa9a4ff5d1 100644 --- a/src/AggregateFunctions/AggregateFunctionsMinMax.cpp +++ b/src/AggregateFunctions/AggregateFunctionsMinMax.cpp @@ -195,8 +195,8 @@ AggregateFunctionPtr createAggregateFunctionMinMax( void registerAggregateFunctionsMinMax(AggregateFunctionFactory & factory) { - factory.registerFunction("min", createAggregateFunctionMinMax, AggregateFunctionFactory::CaseInsensitive); - factory.registerFunction("max", createAggregateFunctionMinMax, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("min", createAggregateFunctionMinMax, AggregateFunctionFactory::Case::Insensitive); + factory.registerFunction("max", createAggregateFunctionMinMax, AggregateFunctionFactory::Case::Insensitive); } } diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h index 62ddc06659c..67e3b75d8b5 100644 --- a/src/Analyzer/InDepthQueryTreeVisitor.h +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -3,7 +3,6 @@ #include #include -#include #include #include diff --git a/src/Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.cpp b/src/Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.cpp index a419bbcc133..b68635ab953 100644 --- a/src/Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionOfGroupByKeysPass.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index 098e2858abc..6578a6c5bd7 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -11,6 +11,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp index 62db502e1dc..bf4c1232d79 100644 --- a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp +++ b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp @@ -11,6 +11,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp index 70aa1a41548..f7dd542dbf4 100644 --- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp +++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp @@ -8,6 +8,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp index eb897ef8746..2b2ac95d7b9 100644 --- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp +++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp @@ -22,6 +22,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp index 5951e8fc5ea..f66d092429f 100644 --- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp +++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp @@ -10,6 +10,8 @@ #include #include +#include + #include #include diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 23fde89a348..3f9185a07ca 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -11,6 +11,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp index 3e2a2055fdb..eb615879893 100644 --- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -9,13 +9,14 @@ #include #include #include +#include #include #include #include #include -#include +#include namespace DB diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 90051779a26..b968f43c6a6 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -21,6 +21,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index ac4678fb362..0175e304a2b 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp index e259ffa64a6..91f06183523 100644 --- a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp +++ b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp @@ -1,6 +1,7 @@ #include #include +#include #include diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp index beb2247607e..cff98adb6af 100644 --- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 29b392a0951..d966f129d08 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 698602ca5bc..5c68bca3a6e 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -8,9 +8,11 @@ #include #include #include +#include #include #include +#include namespace DB { @@ -66,6 +68,41 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes) return nullptr; } +/// Checks if the node is combination of isNull and notEquals functions of two the same arguments +bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs) +{ + QueryTreeNodePtrWithHashSet all_arguments; + for (const auto & node : nodes) + { + const auto * func_node = node->as(); + if (!func_node) + return false; + + const auto & arguments = func_node->getArguments().getNodes(); + if (func_node->getFunctionName() == "isNull" && arguments.size() == 1) + all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2) + { + if (arguments[0]->isEqual(*arguments[1])) + return false; + all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + all_arguments.insert(QueryTreeNodePtrWithHash(arguments[1])); + } + else + return false; + + if (all_arguments.size() > 2) + return false; + } + + if (all_arguments.size() != 2) + return false; + + lhs = all_arguments.begin()->node; + rhs = std::next(all_arguments.begin())->node; + return true; +} + bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value) { const auto * constant_node = node->as(); @@ -211,11 +248,14 @@ private: else if (func_name == "and") { const auto & and_arguments = argument_function->getArguments().getNodes(); - bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull"); - if (all_are_is_null) + + QueryTreeNodePtr is_null_lhs_arg; + QueryTreeNodePtr is_null_rhs_arg; + if (matchIsNullOfTwoArgs(and_arguments, is_null_lhs_arg, is_null_rhs_arg)) { - is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1); - is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1); + is_null_argument_to_indices[is_null_lhs_arg].push_back(or_operands.size() - 1); + is_null_argument_to_indices[is_null_rhs_arg].push_back(or_operands.size() - 1); + continue; } /// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b` @@ -615,6 +655,7 @@ private: bool is_any_nullable = false; Tuple args; args.reserve(equals_functions.size()); + DataTypes tuple_element_types; /// first we create tuple from RHS of equals functions for (const auto & equals : equals_functions) { @@ -627,16 +668,18 @@ private: if (const auto * rhs_literal = equals_arguments[1]->as()) { args.push_back(rhs_literal->getValue()); + tuple_element_types.push_back(rhs_literal->getResultType()); } else { const auto * lhs_literal = equals_arguments[0]->as(); assert(lhs_literal); args.push_back(lhs_literal->getValue()); + tuple_element_types.push_back(lhs_literal->getResultType()); } } - auto rhs_node = std::make_shared(std::move(args)); + auto rhs_node = std::make_shared(std::move(args), std::make_shared(std::move(tuple_element_types))); auto in_function = std::make_shared("in"); diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp index c42ea61b34a..0b441032b02 100644 --- a/src/Analyzer/Passes/MultiIfToIfPass.cpp +++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index f9066b0363c..3a8b6e75d40 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index a8364f1ab7a..feb8bcc792d 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp index fd8c3e6ee6c..bfb6aa1dd07 100644 --- a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp +++ b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp index a30ad2a1590..fc989003c02 100644 --- a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp +++ b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index b13af7f00ae..4f0271b4914 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index 05377f8514e..c1adf05ac76 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -6,6 +6,8 @@ #include #include +#include + #include #include diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 524ee807721..a42b4750f47 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include namespace DB diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index e5ee8a0d0b2..1524629dc81 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -1,19 +1,16 @@ #include -#include -#include - #include #include -#include - -#include - -#include - #include #include #include +#include +#include +#include +#include +#include +#include namespace DB { diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 947952ac3a8..cb8ef35fc13 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -9,6 +9,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index 83705f54b38..3f73322d1fa 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -9,6 +9,8 @@ #include #include +#include + namespace DB { diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index dd083dd5df6..a62b6e56ac5 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -40,6 +40,8 @@ #include #include +#include + #include #include diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index f7919b6422c..4443f83596f 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -62,7 +62,7 @@ namespace ErrorCodes namespace { -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) /** This visitor checks if Query Tree structure is valid after each pass * in debug build. @@ -183,7 +183,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node) for (size_t i = 0; i < passes_size; ++i) { passes[i]->run(query_tree_node, current_context); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) ValidationChecker(passes[i]->getName()).visit(query_tree_node); #endif } @@ -208,7 +208,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pa for (size_t i = 0; i < up_to_pass_index; ++i) { passes[i]->run(query_tree_node, current_context); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) ValidationChecker(passes[i]->getName()).visit(query_tree_node); #endif } diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.cpp b/src/Analyzer/Resolve/IdentifierResolveScope.cpp index ae363b57047..eb3e2179440 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.cpp +++ b/src/Analyzer/Resolve/IdentifierResolveScope.cpp @@ -1,8 +1,9 @@ #include -#include #include #include +#include +#include namespace DB { diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 692a31b66ba..447bf825836 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,8 @@ #include #include +#include + namespace DB { namespace ErrorCodes @@ -678,9 +681,33 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage( bool match_full_identifier = false; const auto & identifier_full_name = identifier_without_column_qualifier.getFullName(); - auto it = table_expression_data.column_name_to_column_node.find(identifier_full_name); - bool can_resolve_directly_from_storage = it != table_expression_data.column_name_to_column_node.end(); - if (can_resolve_directly_from_storage && table_expression_data.subcolumn_names.contains(identifier_full_name)) + + ColumnNodePtr result_column_node; + bool can_resolve_directly_from_storage = false; + bool is_subcolumn = false; + if (auto it = table_expression_data.column_name_to_column_node.find(identifier_full_name); it != table_expression_data.column_name_to_column_node.end()) + { + can_resolve_directly_from_storage = true; + is_subcolumn = table_expression_data.subcolumn_names.contains(identifier_full_name); + result_column_node = it->second; + } + /// Check if it's a dynamic subcolumn + else + { + auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name); + auto jt = table_expression_data.column_name_to_column_node.find(column_name); + if (jt != table_expression_data.column_name_to_column_node.end() && jt->second->getColumnType()->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = jt->second->getColumnType()->tryGetSubcolumnType(dynamic_subcolumn_name)) + { + result_column_node = std::make_shared(NameAndTypePair{identifier_full_name, dynamic_subcolumn_type}, jt->second->getColumnSource()); + can_resolve_directly_from_storage = true; + is_subcolumn = true; + } + } + } + + if (can_resolve_directly_from_storage && is_subcolumn) { /** In the case when we have an ARRAY JOIN, we should not resolve subcolumns directly from storage. * For example, consider the following SQL query: @@ -696,11 +723,11 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage( if (can_resolve_directly_from_storage) { match_full_identifier = true; - result_expression = it->second; + result_expression = result_column_node; } else { - it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.at(0)); + auto it = table_expression_data.column_name_to_column_node.find(identifier_without_column_qualifier.at(0)); if (it != table_expression_data.column_name_to_column_node.end()) result_expression = it->second; } diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 576c4943ccb..a43e3e62ebc 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -64,6 +64,8 @@ #include #include +#include + namespace ProfileEvents { extern const Event ScalarSubqueriesGlobalCacheHit; @@ -1490,6 +1492,10 @@ void QueryAnalyzer::qualifyColumnNodesWithProjectionNames(const QueryTreeNodes & additional_column_qualification_parts = {table_expression_node->getAlias()}; else if (auto * table_node = table_expression_node->as()) additional_column_qualification_parts = {table_node->getStorageID().getDatabaseName(), table_node->getStorageID().getTableName()}; + else if (auto * query_node = table_expression_node->as(); query_node && query_node->isCTE()) + additional_column_qualification_parts = {query_node->getCTEName()}; + else if (auto * union_node = table_expression_node->as(); union_node && union_node->isCTE()) + additional_column_qualification_parts = {union_node->getCTEName()}; size_t additional_column_qualification_parts_size = additional_column_qualification_parts.size(); const auto & table_expression_data = scope.getTableExpressionDataOrThrow(table_expression_node); @@ -2913,6 +2919,17 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi resolveExpressionNode(in_second_argument, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/); } + + /// Edge case when the first argument of IN is scalar subquery. + auto & in_first_argument = function_in_arguments_nodes[0]; + auto first_argument_type = in_first_argument->getNodeType(); + if (first_argument_type == QueryTreeNodeType::QUERY || first_argument_type == QueryTreeNodeType::UNION) + { + IdentifierResolveScope subquery_scope(in_first_argument, &scope /*parent_scope*/); + subquery_scope.subquery_depth = scope.subquery_depth + 1; + + evaluateScalarSubqueryIfNeeded(in_first_argument, subquery_scope); + } } /// Initialize function argument columns @@ -3410,14 +3427,14 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi function_base = function->build(argument_columns); /// Do not constant fold get scalar functions - bool disable_constant_folding = function_name == "__getScalar" || function_name == "shardNum" || - function_name == "shardCount" || function_name == "hostName" || function_name == "tcpPort"; + // bool disable_constant_folding = function_name == "__getScalar" || function_name == "shardNum" || + // function_name == "shardCount" || function_name == "hostName" || function_name == "tcpPort"; /** If function is suitable for constant folding try to convert it to constant. * Example: SELECT plus(1, 1); * Result: SELECT 2; */ - if (function_base->isSuitableForConstantFolding() && !disable_constant_folding) + if (function_base->isSuitableForConstantFolding()) // && !disable_constant_folding) { auto result_type = function_base->getResultType(); auto executable_function = function_base->prepare(argument_columns); @@ -3826,6 +3843,10 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( node->convertToNullable(); break; } + + /// Check parent scopes until find current query scope. + if (scope_ptr->scope_node->getNodeType() == QueryTreeNodeType::QUERY) + break; } } @@ -4103,7 +4124,9 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo auto * column_to_interpolate = interpolate_node_typed.getExpression()->as(); if (!column_to_interpolate) - throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found", + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "INTERPOLATE can work only for identifiers, but {} is found", interpolate_node_typed.getExpression()->formatASTForErrorMessage()); auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName(); @@ -4455,9 +4478,8 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table { auto left_table_expression = extractLeftTableExpression(scope_query_node->getJoinTree()); if (table_expression_node.get() == left_table_expression.get() && - scope.joins_count == 1 && - scope.context->getSettingsRef().single_join_prefer_left_table) - table_expression_data.should_qualify_columns = false; + scope.joins_count == 1 && scope.context->getSettingsRef().single_join_prefer_left_table) + table_expression_data.should_qualify_columns = false; } scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data)); diff --git a/src/Analyzer/TableNode.cpp b/src/Analyzer/TableNode.cpp index 1c4d022542d..d5a4b06f652 100644 --- a/src/Analyzer/TableNode.cpp +++ b/src/Analyzer/TableNode.cpp @@ -10,6 +10,8 @@ #include +#include + namespace DB { diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index cf4a3f77e34..d10bbd9bd23 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 64ab2112326..3ee7a05600e 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 56544312c26..687096d0404 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -1,6 +1,7 @@ #include #if USE_AWS_S3 +#include #include #include #include @@ -54,9 +55,9 @@ namespace S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( settings.auth_settings.region, context->getRemoteHostFilter(), - static_cast(global_settings.s3_max_redirects), - static_cast(global_settings.s3_retry_attempts), - global_settings.enable_s3_requests_logging, + static_cast(local_settings.s3_max_redirects), + static_cast(local_settings.backup_restore_s3_retry_attempts), + local_settings.enable_s3_requests_logging, /* for_disk_s3 = */ false, request_settings.get_request_throttler, request_settings.put_request_throttler, diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index fa8ed5855dd..cd3f963b15d 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -105,7 +105,7 @@ bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAS auto new_query = query.clone(); adjustCreateQueryForBackup(new_query, global_context); ASTCreateQuery & create = typeid_cast(*new_query); - create.setUUID({}); + create.resetUUIDs(); create.if_not_exists = false; return new_query; }; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 69d9c52ebd9..15a7d7c1eca 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp index 7e5ce91629b..910831195a3 100644 --- a/src/Backups/DDLAdjustingForBackupVisitor.cpp +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -1,11 +1,11 @@ #include +#include +#include #include #include #include -#include -#include - #include +#include namespace DB diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp index f51d6c0c1d8..9fe22f874b4 100644 --- a/src/Backups/RestoreCoordinationLocal.cpp +++ b/src/Backups/RestoreCoordinationLocal.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -67,7 +68,7 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer auto it = create_query_uuids.find(query_str); if (it != create_query_uuids.end()) { - create_query.setUUID(it->second); + it->second.copyToQuery(create_query); return true; } return false; @@ -79,7 +80,8 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer return; } - auto new_uuids = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true); + CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true}; + new_uuids.copyToQuery(create_query); { std::lock_guard lock{mutex}; diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h index 5e51b719d63..35f93574b68 100644 --- a/src/Backups/RestoreCoordinationLocal.h +++ b/src/Backups/RestoreCoordinationLocal.h @@ -1,16 +1,17 @@ #pragma once #include -#include +#include +#include #include #include #include -namespace Poco { class Logger; } - namespace DB { +class ASTCreateQuery; + /// Implementation of the IRestoreCoordination interface performing coordination in memory. class RestoreCoordinationLocal : public IRestoreCoordination @@ -55,7 +56,7 @@ private: std::set> acquired_tables_in_replicated_databases; std::unordered_set acquired_data_in_replicated_tables; - std::unordered_map create_query_uuids; + std::unordered_map create_query_uuids; std::unordered_set acquired_data_in_keeper_map_tables; mutable std::mutex mutex; diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp index 84106737fc9..44214d00be5 100644 --- a/src/Backups/RestoreCoordinationRemote.cpp +++ b/src/Backups/RestoreCoordinationRemote.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -269,7 +270,8 @@ bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String & void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query) { String query_str = serializeAST(create_query); - String new_uuids_str = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true).toString(); + CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true}; + String new_uuids_str = new_uuids.toString(); auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable"); holder.retries_ctl.retryLoop( @@ -281,11 +283,14 @@ void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_que Coordination::Error res = zk->tryCreate(path, new_uuids_str, zkutil::CreateMode::Persistent); if (res == Coordination::Error::ZOK) + { + new_uuids.copyToQuery(create_query); return; + } if (res == Coordination::Error::ZNODEEXISTS) { - create_query.setUUID(ASTCreateQuery::UUIDs::fromString(zk->get(path))); + CreateQueryUUIDs::fromString(zk->get(path)).copyToQuery(create_query); return; } diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 454a0468e9f..3056f9fe421 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -23,8 +23,9 @@ #include #include #include -#include +#include +#include #include #include diff --git a/src/Backups/WithRetries.cpp b/src/Backups/WithRetries.cpp index 66851fa42ce..181e6331ac9 100644 --- a/src/Backups/WithRetries.cpp +++ b/src/Backups/WithRetries.cpp @@ -1,5 +1,7 @@ -#include #include +#include + +#include namespace DB { diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp index c25d7bd2fed..de48a4f2b84 100644 --- a/src/Bridge/IBridge.cpp +++ b/src/Bridge/IBridge.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/src/BridgeHelper/LibraryBridgeHelper.cpp b/src/BridgeHelper/LibraryBridgeHelper.cpp index 84bfe096e79..12ca5564e80 100644 --- a/src/BridgeHelper/LibraryBridgeHelper.cpp +++ b/src/BridgeHelper/LibraryBridgeHelper.cpp @@ -1,5 +1,7 @@ #include "LibraryBridgeHelper.h" +#include +#include #include namespace DB diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d985595154c..dac20aaca48 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -419,9 +419,6 @@ dbms_target_link_libraries ( boost::circular_buffer boost::heap) -target_include_directories(clickhouse_common_io PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/Core/include") # uses some includes from core -dbms_target_include_directories(PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/Core/include") - target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::miniselect ch_contrib::pdqsort) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 5d472ba99b9..7af199131b6 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,11 @@ #include #include "config.h" +#if USE_GWP_ASAN +# include +#endif + + namespace fs = std::filesystem; using namespace std::literals; @@ -301,7 +307,13 @@ public: }; -ClientBase::~ClientBase() = default; +ClientBase::~ClientBase() +{ + writeSignalIDtoSignalPipe(SignalListener::StopThread); + signal_listener_thread.join(); + HandledSignals::instance().reset(); +} + ClientBase::ClientBase( int in_fd_, int out_fd_, @@ -2069,9 +2081,18 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin progress_indication.writeFinalProgress(); output_stream << std::endl << std::endl; } - else if (getClientConfiguration().getBool("print-time-to-stderr", false)) + else { - error_stream << progress_indication.elapsedSeconds() << "\n"; + const auto & config = getClientConfiguration(); + if (config.getBool("print-time-to-stderr", false)) + error_stream << progress_indication.elapsedSeconds() << "\n"; + + const auto & print_memory_mode = config.getString("print-memory-to-stderr", ""); + auto peak_memeory_usage = std::max(progress_indication.getMemoryUsage().peak, 0); + if (print_memory_mode == "default") + error_stream << peak_memeory_usage << "\n"; + else if (print_memory_mode == "readable") + error_stream << formatReadableSizeWithBinarySuffix(peak_memeory_usage) << "\n"; } if (!is_interactive && getClientConfiguration().getBool("print-num-processed-rows", false)) @@ -2565,12 +2586,12 @@ void ClientBase::runInteractive() word_break_characters, highlight_callback); #else + (void)word_break_characters; LineReader lr( history_file, getClientConfiguration().has("multiline"), query_extenders, - query_delimiters, - word_break_characters); + query_delimiters); #endif static const std::initializer_list> backslash_aliases = @@ -3035,6 +3056,7 @@ void ClientBase::init(int argc, char ** argv) ("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.") ("wait_for_suggestions_to_load", "Load suggestion data synchonously.") ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)") + ("memory-usage", po::value()->implicit_value("default")->default_value("none"), "print memory usage to stderr in non-interactive mode (for benchmarks). Values: 'none', 'default', 'readable'") ("echo", "in batch mode, print query before execution") @@ -3061,6 +3083,8 @@ void ClientBase::init(int argc, char ** argv) ("max_memory_usage_in_client", po::value(), "Set memory limit in client/local server") ("fuzzer-args", po::value(), "Command line arguments for the LLVM's libFuzzer driver. Only relevant if the application is compiled with libFuzzer.") + + ("client_logs_file", po::value(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)") ; addOptions(options_description); @@ -3120,6 +3144,14 @@ void ClientBase::init(int argc, char ** argv) /// Output execution time to stderr in batch mode. if (options.count("time")) getClientConfiguration().setBool("print-time-to-stderr", true); + if (options.count("memory-usage")) + { + const auto & memory_usage_mode = options["memory-usage"].as(); + if (memory_usage_mode != "none" && memory_usage_mode != "default" && memory_usage_mode != "readable") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown memory-usage mode: {}", memory_usage_mode); + getClientConfiguration().setString("print-memory-to-stderr", memory_usage_mode); + } + if (options.count("query")) queries = options["query"].as>(); if (options.count("query_id")) @@ -3217,6 +3249,30 @@ void ClientBase::init(int argc, char ** argv) total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); } + + /// Print stacktrace in case of crash + HandledSignals::instance().setupTerminateHandler(); + HandledSignals::instance().setupCommonDeadlySignalHandlers(); + /// We don't setup signal handlers for SIGINT, SIGQUIT, SIGTERM because we don't + /// have an option for client to shutdown gracefully. + + fatal_channel_ptr = new Poco::SplitterChannel; + fatal_console_channel_ptr = new Poco::ConsoleChannel; + fatal_channel_ptr->addChannel(fatal_console_channel_ptr); + if (options.count("client_logs_file")) + { + fatal_file_channel_ptr = new Poco::SimpleFileChannel(options["client_logs_file"].as()); + fatal_channel_ptr->addChannel(fatal_file_channel_ptr); + } + + fatal_log = createLogger("ClientBase", fatal_channel_ptr.get(), Poco::Message::PRIO_FATAL); + signal_listener = std::make_unique(nullptr, fatal_log); + signal_listener_thread.start(*signal_listener); + +#if USE_GWP_ASAN + GWPAsan::initFinished(); +#endif + } } diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index a260fd5761e..4f500a4c45d 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -10,7 +10,11 @@ #include #include #include +#include #include +#include +#include +#include #include #include #include @@ -211,6 +215,13 @@ protected: SharedContextHolder shared_context; ContextMutablePtr global_context; + LoggerPtr fatal_log; + Poco::AutoPtr fatal_channel_ptr; + Poco::AutoPtr fatal_console_channel_ptr; + Poco::AutoPtr fatal_file_channel_ptr; + Poco::Thread signal_listener_thread; + std::unique_ptr signal_listener; + bool is_interactive = false; /// Use either interactive line editing interface or batch mode. bool is_multiquery = false; bool delayed_interactive = false; diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp index 05839b44452..4bab643ed31 100644 --- a/src/Client/ConnectionEstablisher.cpp +++ b/src/Client/ConnectionEstablisher.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace ProfileEvents { diff --git a/src/Client/ConnectionPool.cpp b/src/Client/ConnectionPool.cpp index 5cabb1465d1..ed2e7c3c725 100644 --- a/src/Client/ConnectionPool.cpp +++ b/src/Client/ConnectionPool.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -85,4 +86,15 @@ ConnectionPoolFactory & ConnectionPoolFactory::instance() return ret; } +IConnectionPool::Entry ConnectionPool::get(const DB::ConnectionTimeouts& timeouts, const DB::Settings& settings, + bool force_connected) +{ + Entry entry = Base::get(settings.connection_pool_max_wait_ms.totalMilliseconds()); + + if (force_connected) + entry->forceConnected(timeouts); + + return entry; +} + } diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index 725a5e91ac0..0fcb3c4e7e1 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -4,12 +4,13 @@ #include #include #include -#include #include namespace DB { +struct Settings; + /** Interface for connection pools. * * Usage (using the usual `ConnectionPool` example) @@ -102,15 +103,7 @@ public: Entry get(const ConnectionTimeouts & timeouts, /// NOLINT const Settings & settings, - bool force_connected = true) override - { - Entry entry = Base::get(settings.connection_pool_max_wait_ms.totalMilliseconds()); - - if (force_connected) - entry->forceConnected(timeouts); - - return entry; - } + bool force_connected) override; std::string getDescription() const { diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 4effc3adafa..dd8348ea04f 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h index 51da85ea178..63e39819f58 100644 --- a/src/Client/HedgedConnectionsFactory.h +++ b/src/Client/HedgedConnectionsFactory.h @@ -8,13 +8,14 @@ #include #include #include -#include #include #include namespace DB { +struct Settings; + /** Class for establishing hedged connections with replicas. * The process of establishing connection is divided on stages, on each stage if * replica doesn't respond for a long time, we start establishing connection with diff --git a/src/Client/IConnections.h b/src/Client/IConnections.h index ebc71511834..09211de53b0 100644 --- a/src/Client/IConnections.h +++ b/src/Client/IConnections.h @@ -54,8 +54,6 @@ public: struct ReplicaInfo { - bool collaborate_with_initiator{false}; - size_t all_replicas_count{0}; size_t number_of_current_replica{0}; }; diff --git a/src/Client/LineReader.h b/src/Client/LineReader.h index 0172bd7ec22..8c101401190 100644 --- a/src/Client/LineReader.h +++ b/src/Client/LineReader.h @@ -46,8 +46,7 @@ public: Patterns delimiters, std::istream & input_stream_ = std::cin, std::ostream & output_stream_ = std::cout, - int in_fd_ = STDIN_FILENO - ); + int in_fd_ = STDIN_FILENO); virtual ~LineReader() = default; diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index fb6fa1b55eb..b424c5b5aa3 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -175,4 +175,5 @@ private: ReadBuffer * in; }; + } diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index bcef286ecbc..244eccf1ed9 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -141,13 +142,12 @@ void MultiplexedConnections::sendQuery( modified_settings.group_by_two_level_threshold = 0; modified_settings.group_by_two_level_threshold_bytes = 0; } + } - if (replica_info) - { - client_info.collaborate_with_initiator = true; - client_info.count_participating_replicas = replica_info->all_replicas_count; - client_info.number_of_current_replica = replica_info->number_of_current_replica; - } + if (replica_info) + { + client_info.collaborate_with_initiator = true; + client_info.number_of_current_replica = replica_info->number_of_current_replica; } /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp index 46600168695..3b3508d1a58 100644 --- a/src/Client/ReplxxLineReader.cpp +++ b/src/Client/ReplxxLineReader.cpp @@ -362,6 +362,9 @@ ReplxxLineReader::ReplxxLineReader( rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); }); rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); }); + /// We don't want the default, "suspend" behavior, it confuses people. + rx.bind_key_internal(replxx::Replxx::KEY::control('Z'), "insert_character"); + auto commit_action = [this](char32_t code) { /// If we allow multiline and there is already something in the input, start a newline. diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index cfd07c27765..e26fe790a8e 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -267,7 +267,7 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) #else void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length) @@ -366,13 +366,10 @@ void ColumnAggregateFunction::updateHashWithValue(size_t n, SipHash & hash) cons hash.update(wbuf.str().c_str(), wbuf.str().size()); } -void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnAggregateFunction::getWeakHash32() const { auto s = data.size(); - if (hash.getData().size() != data.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), hash.getData().size()); - + WeakHash32 hash(s); auto & hash_data = hash.getData(); std::vector v; @@ -383,6 +380,8 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const wbuf.finalize(); hash_data[i] = ::updateWeakHash32(v.data(), v.size(), hash_data[i]); } + + return hash; } void ColumnAggregateFunction::updateHashFast(SipHash & hash) const @@ -466,7 +465,7 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size insertMergeFrom(from, n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) #else void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n) diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 1be7a862438..b581c3ba3b4 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -145,7 +145,7 @@ public: void insertData(const char * pos, size_t length) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & from, size_t n) override; #else using IColumn::insertFrom; @@ -177,7 +177,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; @@ -189,7 +189,7 @@ public: void protect() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override; @@ -212,7 +212,7 @@ public: MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override #else int doCompareAt(size_t, size_t, const IColumn &, int) const override diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 5d7350f3a79..19cce678cc7 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -271,15 +271,12 @@ void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const getData().updateHashWithValue(offset + i, hash); } -void ColumnArray::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnArray::getWeakHash32() const { auto s = offsets->size(); - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", s, hash.getData().size()); + WeakHash32 hash(s); - WeakHash32 internal_hash(data->size()); - data->updateWeakHash32(internal_hash); + WeakHash32 internal_hash = data->getWeakHash32(); Offset prev_offset = 0; const auto & offsets_data = getOffsets(); @@ -300,6 +297,8 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const prev_offset = offsets_data[i]; } + + return hash; } void ColumnArray::updateHashFast(SipHash & hash) const @@ -337,7 +336,7 @@ bool ColumnArray::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnArray::insertFrom(const IColumn & src_, size_t n) #else void ColumnArray::doInsertFrom(const IColumn & src_, size_t n) @@ -396,7 +395,7 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan : 1); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const #else int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const @@ -543,7 +542,7 @@ void ColumnArray::getExtremes(Field & min, Field & max) const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 6cd3e2f6c3b..63affb86d9d 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -82,16 +82,16 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; #else void doInsertFrom(const IColumn & src_, size_t n) override; @@ -103,7 +103,7 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 5e455709fec..c4270e8216b 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -85,7 +86,7 @@ public: bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } bool tryInsert(const Field &) override { throwMustBeDecompressed(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } #else void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } @@ -98,13 +99,13 @@ public: const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); } const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); } - void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); } + WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); } void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); } ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); } void expand(const Filter &, bool) override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp index f2cea83db0e..84427e7be2b 100644 --- a/src/Columns/ColumnConst.cpp +++ b/src/Columns/ColumnConst.cpp @@ -137,18 +137,10 @@ void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, Perm { } -void ColumnConst::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnConst::getWeakHash32() const { - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); - - WeakHash32 element_hash(1); - data->updateWeakHash32(element_hash); - size_t data_hash = element_hash.getData()[0]; - - for (auto & value : hash.getData()) - value = static_cast(intHashCRC32(data_hash, value)); + WeakHash32 element_hash = data->getWeakHash32(); + return WeakHash32(s, element_hash.getData()[0]); } void ColumnConst::compareColumn( diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index b55a1f42037..ca38e76ea57 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -123,7 +123,7 @@ public: return data->isNullAt(0); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override #else void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override @@ -151,7 +151,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn &, size_t) override #else void doInsertFrom(const IColumn &, size_t) override @@ -160,7 +160,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } #else void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } @@ -204,7 +204,7 @@ public: data->updateHashWithValue(0, hash); } - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override { @@ -237,7 +237,7 @@ public: return data->allocatedBytes() + sizeof(s); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override #else int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index cf413f790a7..bb4433f8956 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -28,11 +28,10 @@ namespace ErrorCodes extern const int PARAMETER_OUT_OF_BOUND; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const #else int ColumnDecimal::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const @@ -76,13 +75,10 @@ void ColumnDecimal::updateHashWithValue(size_t n, SipHash & hash) const } template -void ColumnDecimal::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnDecimal::getWeakHash32() const { auto s = data.size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); + WeakHash32 hash(s); const T * begin = data.data(); const T * end = begin + s; @@ -94,6 +90,8 @@ void ColumnDecimal::updateWeakHash32(WeakHash32 & hash) const ++begin; ++hash_data; } + + return hash; } template @@ -335,7 +333,7 @@ void ColumnDecimal::insertData(const char * src, size_t /*length*/) } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnDecimal::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 32efeb643a6..59bfbd2159c 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -55,13 +55,13 @@ public: void reserve(size_t n) override { data.reserve_exact(n); } void shrinkToFit() override { data.shrink_to_fit(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } #else void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override @@ -76,7 +76,7 @@ public: void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(x.get()); } bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -102,9 +102,9 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index c735238f515..a92d54dd675 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -215,7 +215,7 @@ bool ColumnDynamic::tryInsert(const DB::Field & x) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) #else void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) @@ -269,7 +269,7 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) #else void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) @@ -439,7 +439,7 @@ void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, si } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) #else void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) @@ -603,7 +603,7 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const #else int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 9abddc7a26d..e92cabd3db9 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -143,7 +144,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -174,9 +175,9 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override + WeakHash32 getWeakHash32() const override { - variant_column->updateWeakHash32(hash); + return variant_column->getWeakHash32(); } void updateHashFast(SipHash & hash) const override @@ -220,7 +221,7 @@ public: return scattered_columns; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 1c2de203a94..0bb3f7edb14 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -74,7 +74,7 @@ bool ColumnFixedString::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) #else void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) @@ -90,7 +90,7 @@ void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -137,14 +137,10 @@ void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const hash.update(reinterpret_cast(&chars[n * index]), n); } -void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnFixedString::getWeakHash32() const { auto s = size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, " - "hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); + WeakHash32 hash(s); const UInt8 * pos = chars.data(); UInt32 * hash_data = hash.getData().data(); @@ -156,6 +152,8 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const pos += n; ++hash_data; } + + return hash; } void ColumnFixedString::updateHashFast(SipHash & hash) const @@ -227,7 +225,7 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & return elements.size(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 6e88136fc50..676ac7712ba 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -98,13 +98,13 @@ public: bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t index) override; #else void doInsertFrom(const IColumn & src_, size_t index) override; #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -133,11 +133,11 @@ public: void updateHashWithValue(size_t index, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override #else int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override @@ -156,7 +156,7 @@ public: size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index fa57f35a823..fc81efaac0c 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -72,7 +72,7 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFunction::insertFrom(const IColumn & src, size_t n) #else void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) @@ -93,7 +93,7 @@ void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) ++elements_size; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index ba924c49a82..b62c6bf70eb 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -94,12 +95,12 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override; @@ -130,9 +131,9 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateHashWithValue is not implemented for {}", getName()); } - void updateWeakHash32(WeakHash32 &) const override + WeakHash32 getWeakHash32() const override { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateWeakHash32 is not implemented for {}", getName()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getWeakHash32 is not implemented for {}", getName()); } void updateHashFast(SipHash &) const override @@ -145,7 +146,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override #else int doCompareAt(size_t, size_t, const IColumn &, int) const override diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index eb694a10b0f..a977046b07f 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -7,8 +7,7 @@ #include #include #include -#include "Storages/IndicesDescription.h" -#include "base/types.h" +#include #include #include @@ -159,7 +158,7 @@ void ColumnLowCardinality::insertDefault() idx.insertPosition(getDictionary().getDefaultValueIndex()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) #else void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n) @@ -191,7 +190,7 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) idx.insertPosition(getDictionary().uniqueInsertFrom(src, n)); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -320,19 +319,10 @@ const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const return getDictionary().skipSerializedInArena(pos); } -void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnLowCardinality::getWeakHash32() const { - auto s = size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); - - const auto & dict = getDictionary().getNestedColumn(); - WeakHash32 dict_hash(dict->size()); - dict->updateWeakHash32(dict_hash); - - idx.updateWeakHash(hash, dict_hash); + WeakHash32 dict_hash = getDictionary().getNestedColumn()->getWeakHash32(); + return idx.getWeakHash(dict_hash); } void ColumnLowCardinality::updateHashFast(SipHash & hash) const @@ -372,7 +362,7 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const @@ -832,10 +822,11 @@ bool ColumnLowCardinality::Index::containsDefault() const return contains; } -void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const +WeakHash32 ColumnLowCardinality::Index::getWeakHash(const WeakHash32 & dict_hash) const { + WeakHash32 hash(positions->size()); auto & hash_data = hash.getData(); - auto & dict_hash_data = dict_hash.getData(); + const auto & dict_hash_data = dict_hash.getData(); auto update_weak_hash = [&](auto x) { @@ -844,10 +835,11 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & auto size = data.size(); for (size_t i = 0; i < size; ++i) - hash_data[i] = static_cast(intHashCRC32(dict_hash_data[data[i]], hash_data[i])); + hash_data[i] = dict_hash_data[data[i]]; }; callForType(std::move(update_weak_hash), size_of_type); + return hash; } void ColumnLowCardinality::Index::collectSerializedValueSizes( diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index e99be07cd8d..3766b247d60 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -78,14 +78,14 @@ public: bool tryInsert(const Field & x) override; void insertDefault() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; #endif void insertFromFullColumn(const IColumn & src, size_t n); -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -111,7 +111,7 @@ public: getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash); } - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash &) const override; @@ -135,7 +135,7 @@ public: return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; @@ -325,7 +325,7 @@ public: bool containsDefault() const; - void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const; + WeakHash32 getWeakHash(const WeakHash32 & dict_hash) const; void collectSerializedValueSizes(PaddedPODArray & sizes, const PaddedPODArray & dict_sizes) const; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 2dffddb2dc9..1025b4e77b9 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -143,9 +143,9 @@ void ColumnMap::updateHashWithValue(size_t n, SipHash & hash) const nested->updateHashWithValue(n, hash); } -void ColumnMap::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnMap::getWeakHash32() const { - nested->updateWeakHash32(hash); + return nested->getWeakHash32(); } void ColumnMap::updateHashFast(SipHash & hash) const @@ -153,7 +153,7 @@ void ColumnMap::updateHashFast(SipHash & hash) const nested->updateHashFast(hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertFrom(const IColumn & src, size_t n) #else void ColumnMap::doInsertFrom(const IColumn & src, size_t n) @@ -162,7 +162,7 @@ void ColumnMap::doInsertFrom(const IColumn & src, size_t n) nested->insertFrom(assert_cast(src).getNestedColumn(), n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -171,7 +171,7 @@ void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t le assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -222,7 +222,7 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele return res; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index a54071a2974..3eaaa0ad562 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -64,10 +64,10 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -83,7 +83,7 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index f060e74b315..6529f0b78db 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -56,25 +56,21 @@ void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const getNestedColumn().updateHashWithValue(n, hash); } -void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnNullable::getWeakHash32() const { auto s = size(); - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); - - WeakHash32 old_hash = hash; - nested_column->updateWeakHash32(hash); + WeakHash32 hash = nested_column->getWeakHash32(); const auto & null_map_data = getNullMapData(); auto & hash_data = hash.getData(); - auto & old_hash_data = old_hash.getData(); - /// Use old data for nulls. + /// Use default for nulls. for (size_t row = 0; row < s; ++row) if (null_map_data[row]) - hash_data[row] = old_hash_data[row]; + hash_data[row] = WeakHash32::kDefaultInitialValue; + + return hash; } void ColumnNullable::updateHashFast(SipHash & hash) const @@ -221,7 +217,7 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const return pos; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -262,7 +258,7 @@ bool ColumnNullable::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertFrom(const IColumn & src, size_t n) #else void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) @@ -274,7 +270,7 @@ void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -414,7 +410,7 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const #else int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index a6d0483e527..fe9f5b6dcc2 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,7 +69,7 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -77,7 +77,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else @@ -100,7 +100,7 @@ public: void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; @@ -133,7 +133,7 @@ public: void protect() override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; void getExtremes(Field & min, Field & max) const override; // Special function for nullable minmax index diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index adcd42b16e9..a6431007cb6 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -763,7 +763,7 @@ void ColumnObject::get(size_t n, Field & res) const } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnObject::insertFrom(const IColumn & src, size_t n) #else void ColumnObject::doInsertFrom(const IColumn & src, size_t n) @@ -772,7 +772,7 @@ void ColumnObject::doInsertFrom(const IColumn & src, size_t n) insert(src[n]); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -1101,4 +1101,10 @@ void ColumnObject::finalize() checkObjectHasNoAmbiguosPaths(getKeys()); } +void ColumnObject::updateHashFast(SipHash & hash) const +{ + for (const auto & entry : subcolumns) + for (auto & part : entry->data.data) + part->updateHashFast(hash); +} } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index fadf2e18779..25cfaa550f6 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -210,7 +211,7 @@ public: bool tryInsert(const Field & field) override; void insertDefault() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else @@ -235,7 +236,7 @@ public: /// Order of rows in ColumnObject is undefined. void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } @@ -252,8 +253,8 @@ public: const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); } const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } - void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } - void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } + WeakHash32 getWeakHash32() const override { throwMustBeConcrete(); } + void updateHashFast(SipHash & hash) const override; void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 809586d8810..a908d970a15 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -174,7 +174,7 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const return values->skipSerializedInArena(pos); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -252,7 +252,7 @@ bool ColumnSparse::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnSparse::insertFrom(const IColumn & src, size_t n) #else void ColumnSparse::doInsertFrom(const IColumn & src, size_t n) @@ -454,7 +454,7 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray & indexes, size_t l return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const #else int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const @@ -678,20 +678,22 @@ void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const values->updateHashWithValue(getValueIndex(n), hash); } -void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnSparse::getWeakHash32() const { - if (hash.getData().size() != _size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", _size, hash.getData().size()); + WeakHash32 values_hash = values->getWeakHash32(); + WeakHash32 hash(size()); + + auto & hash_data = hash.getData(); + auto & values_hash_data = values_hash.getData(); auto offset_it = begin(); - auto & hash_data = hash.getData(); for (size_t i = 0; i < _size; ++i, ++offset_it) { size_t value_index = offset_it.getValueIndex(); - auto data_ref = values->getDataAt(value_index); - hash_data[i] = ::updateWeakHash32(reinterpret_cast(data_ref.data), data_ref.size, hash_data[i]); + hash_data[i] = values_hash_data[value_index]; } + + return hash; } void ColumnSparse::updateHashFast(SipHash & hash) const diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 3e34d1de94a..7a4d914e62a 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -81,14 +81,14 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char *) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; @@ -106,7 +106,7 @@ public: template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; @@ -139,7 +139,7 @@ public: void protect() override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; void getExtremes(Field & min, Field & max) const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 1eda9714d62..7cfa2571f5a 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -39,7 +39,7 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -108,13 +108,10 @@ MutableColumnPtr ColumnString::cloneResized(size_t to_size) const return res; } -void ColumnString::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnString::getWeakHash32() const { auto s = offsets.size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); + WeakHash32 hash(s); const UInt8 * pos = chars.data(); UInt32 * hash_data = hash.getData().data(); @@ -130,10 +127,12 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const prev_offset = offset; ++hash_data; } + + return hash; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 602ffac65e8..c1012e1e55e 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -142,7 +142,7 @@ public: return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override #else void doInsertFrom(const IColumn & src_, size_t n) override @@ -169,7 +169,7 @@ public: } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -212,7 +212,7 @@ public: hash.update(reinterpret_cast(&chars[offset]), string_size); } - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override { @@ -220,7 +220,7 @@ public: hash.update(reinterpret_cast(chars.data()), chars.size() * sizeof(chars[0])); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -250,7 +250,7 @@ public: offsets.push_back(offsets.back() + 1); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 9b822d7f570..4fc3f88a87c 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -201,11 +201,12 @@ bool ColumnTuple::tryInsert(const Field & x) return false; } } + ++column_length; return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertFrom(const IColumn & src_, size_t n) #else void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) @@ -222,7 +223,7 @@ void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -236,6 +237,7 @@ void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t for (size_t i = 0; i < tuple_size; ++i) columns[i]->insertManyFrom(*src_tuple.columns[i], position, length); + column_length += length; } void ColumnTuple::insertDefault() @@ -308,16 +310,15 @@ void ColumnTuple::updateHashWithValue(size_t n, SipHash & hash) const column->updateHashWithValue(n, hash); } -void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnTuple::getWeakHash32() const { auto s = size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); + WeakHash32 hash(s); for (const auto & column : columns) - column->updateWeakHash32(hash); + hash.update(column->getWeakHash32()); + + return hash; } void ColumnTuple::updateHashFast(SipHash & hash) const @@ -326,7 +327,7 @@ void ColumnTuple::updateHashFast(SipHash & hash) const column->updateHashFast(hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -482,7 +483,7 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_ return 0; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 38e479791d4..16b47a993f6 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -66,7 +66,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else @@ -81,9 +81,9 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -94,7 +94,7 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index ec1f8e0a4d5..d6cb75679be 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -90,7 +90,7 @@ public: return getNestedColumn()->updateHashWithValue(n, hash_func); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; @@ -492,7 +492,7 @@ const char * ColumnUnique::skipSerializedInArena(const char *) const } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnUnique::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index ee5de4c2dde..de7efb41d19 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -595,7 +595,7 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertFrom(const IColumn & src_, size_t n) #else void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) @@ -604,7 +604,7 @@ void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) insertFromImpl(src_, n, nullptr); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) #else void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) @@ -613,7 +613,7 @@ void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t insertRangeFromImpl(src_, start, length, nullptr); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) #else void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) @@ -789,36 +789,26 @@ void ColumnVariant::updateHashWithValue(size_t n, SipHash & hash) const variants[localDiscriminatorByGlobal(global_discr)]->updateHashWithValue(offsetAt(n), hash); } -void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnVariant::getWeakHash32() const { auto s = size(); - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); - /// If we have only NULLs, keep hash unchanged. if (hasOnlyNulls()) - return; + return WeakHash32(s); /// Optimization for case when there is only 1 non-empty variant and no NULLs. /// In this case we can just calculate weak hash for this variant. if (auto non_empty_local_discr = getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) - { - variants[*non_empty_local_discr]->updateWeakHash32(hash); - return; - } + return variants[*non_empty_local_discr]->getWeakHash32(); /// Calculate weak hash for all variants. std::vector nested_hashes; for (const auto & variant : variants) - { - WeakHash32 nested_hash(variant->size()); - variant->updateWeakHash32(nested_hash); - nested_hashes.emplace_back(std::move(nested_hash)); - } + nested_hashes.emplace_back(variant->getWeakHash32()); /// For each row hash is a hash of corresponding row from corresponding variant. + WeakHash32 hash(s); auto & hash_data = hash.getData(); const auto & local_discriminators_data = getLocalDiscriminators(); const auto & offsets_data = getOffsets(); @@ -827,11 +817,10 @@ void ColumnVariant::updateWeakHash32(WeakHash32 & hash) const Discriminator discr = local_discriminators_data[i]; /// Update hash only for non-NULL values if (discr != NULL_DISCRIMINATOR) - { - auto nested_hash = nested_hashes[local_discriminators_data[i]].getData()[offsets_data[i]]; - hash_data[i] = static_cast(hashCRC32(nested_hash, hash_data[i])); - } + hash_data[i] = nested_hashes[discr].getData()[offsets_data[i]]; } + + return hash; } void ColumnVariant::updateHashFast(SipHash & hash) const @@ -1186,7 +1175,7 @@ bool ColumnVariant::hasEqualValues() const return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index d91b8e93a7d..34c24b5428d 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -180,7 +180,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; @@ -213,7 +213,7 @@ public: const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos); const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; @@ -223,7 +223,7 @@ public: ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 19d1b800961..c474efe35bd 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -73,13 +73,10 @@ void ColumnVector::updateHashWithValue(size_t n, SipHash & hash) const } template -void ColumnVector::updateWeakHash32(WeakHash32 & hash) const +WeakHash32 ColumnVector::getWeakHash32() const { auto s = data.size(); - - if (hash.getData().size() != s) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " - "column size is {}, hash size is {}", std::to_string(s), std::to_string(hash.getData().size())); + WeakHash32 hash(s); const T * begin = data.data(); const T * end = begin + s; @@ -91,6 +88,8 @@ void ColumnVector::updateWeakHash32(WeakHash32 & hash) const ++begin; ++hash_data; } + + return hash; } template @@ -503,7 +502,7 @@ bool ColumnVector::tryInsert(const DB::Field & x) } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnVector::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 3a0acf5e312..2fe5b635bd2 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -64,7 +64,7 @@ public: return data.size(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override #else void doInsertFrom(const IColumn & src, size_t n) override @@ -73,7 +73,7 @@ public: data.push_back(assert_cast(src).getData()[n]); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override @@ -114,7 +114,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; - void updateWeakHash32(WeakHash32 & hash) const override; + WeakHash32 getWeakHash32() const override; void updateHashFast(SipHash & hash) const override; @@ -150,7 +150,7 @@ public: } /// This method implemented in header because it could be possibly devirtualized. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override @@ -240,7 +240,7 @@ public: bool tryInsert(const DB::Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 552e52cf51c..a189903b617 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -46,7 +46,7 @@ String IColumn::dumpStructure() const return res.str(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void IColumn::insertFrom(const IColumn & src, size_t n) #else void IColumn::doInsertFrom(const IColumn & src, size_t n) diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 4b6f34e5aa2..f9c1a3e7034 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -179,7 +179,7 @@ public: /// Appends n-th element from other column with the same type. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertFrom(const IColumn & src, size_t n); #else void insertFrom(const IColumn & src, size_t n) @@ -191,7 +191,7 @@ public: /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; #else void insertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -202,7 +202,7 @@ public: #endif /// Appends one element from other column with the same type multiple times. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertManyFrom(const IColumn & src, size_t position, size_t length) { for (size_t i = 0; i < length; ++i) @@ -300,10 +300,10 @@ public: /// passed bytes to hash must identify sequence of values unambiguously. virtual void updateHashWithValue(size_t n, SipHash & hash) const = 0; - /// Update hash function value. Hash is calculated for each element. + /// Get hash function value. Hash is calculated for each element. /// It's a fast weak hash function. Mainly need to scatter data between threads. /// WeakHash32 must have the same size as column. - virtual void updateWeakHash32(WeakHash32 & hash) const = 0; + virtual WeakHash32 getWeakHash32() const = 0; /// Update state of hash with all column. virtual void updateHashFast(SipHash & hash) const = 0; @@ -345,7 +345,7 @@ public: * * For non Nullable and non floating point types, nan_direction_hint is ignored. */ -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; #else [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const @@ -667,7 +667,7 @@ protected: Sort full_sort, PartialSort partial_sort) const; -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) virtual void doInsertFrom(const IColumn & src, size_t n); virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index c19fb704d9b..40d410e207d 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -26,7 +27,7 @@ public: size_t byteSize() const override { return 0; } size_t byteSizeAt(size_t) const override { return 0; } size_t allocatedBytes() const override { return 0; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } @@ -63,15 +64,16 @@ public: { } - void updateWeakHash32(WeakHash32 & /*hash*/) const override + WeakHash32 getWeakHash32() const override { + return WeakHash32(s); } void updateHashFast(SipHash & /*hash*/) const override { } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn &, size_t) override #else void doInsertFrom(const IColumn &, size_t) override @@ -80,7 +82,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override #else void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index 3398452b7ee..a8e10e5e2b2 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace DB { @@ -85,7 +86,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique."); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t, size_t) override #else void doInsertRangeFrom(const IColumn &, size_t, size_t) override @@ -166,9 +167,9 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method scatter is not supported for ColumnUnique."); } - void updateWeakHash32(WeakHash32 &) const override + WeakHash32 getWeakHash32() const override { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updateWeakHash32 is not supported for ColumnUnique."); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getWeakHash32 is not supported for ColumnUnique."); } void updateHashFast(SipHash &) const override diff --git a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp index 645f6ed79f3..240099f0ae5 100644 --- a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp +++ b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp @@ -52,7 +52,7 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) #else static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src) diff --git a/src/Columns/tests/gtest_weak_hash_32.cpp b/src/Columns/tests/gtest_weak_hash_32.cpp index 2c95998761b..3143d0ff83c 100644 --- a/src/Columns/tests/gtest_weak_hash_32.cpp +++ b/src/Columns/tests/gtest_weak_hash_32.cpp @@ -60,8 +60,7 @@ TEST(WeakHash32, ColumnVectorU8) data.push_back(i); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -77,8 +76,7 @@ TEST(WeakHash32, ColumnVectorI8) data.push_back(i); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -94,8 +92,7 @@ TEST(WeakHash32, ColumnVectorU16) data.push_back(i); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -111,8 +108,7 @@ TEST(WeakHash32, ColumnVectorI16) data.push_back(i); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -128,8 +124,7 @@ TEST(WeakHash32, ColumnVectorU32) data.push_back(i << 16u); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -145,8 +140,7 @@ TEST(WeakHash32, ColumnVectorI32) data.push_back(i << 16); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -162,8 +156,7 @@ TEST(WeakHash32, ColumnVectorU64) data.push_back(i << 32u); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -179,8 +172,7 @@ TEST(WeakHash32, ColumnVectorI64) data.push_back(i << 32); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -204,8 +196,7 @@ TEST(WeakHash32, ColumnVectorU128) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), eq_data); } @@ -221,8 +212,7 @@ TEST(WeakHash32, ColumnVectorI128) data.push_back(i << 32); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -238,8 +228,7 @@ TEST(WeakHash32, ColumnDecimal32) data.push_back(i << 16); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -255,8 +244,7 @@ TEST(WeakHash32, ColumnDecimal64) data.push_back(i << 32); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -272,8 +260,7 @@ TEST(WeakHash32, ColumnDecimal128) data.push_back(i << 32); } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), col->getData()); } @@ -294,8 +281,7 @@ TEST(WeakHash32, ColumnString1) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -331,8 +317,7 @@ TEST(WeakHash32, ColumnString2) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -369,8 +354,7 @@ TEST(WeakHash32, ColumnString3) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -397,8 +381,7 @@ TEST(WeakHash32, ColumnFixedString) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -444,8 +427,7 @@ TEST(WeakHash32, ColumnArray) auto col_arr = ColumnArray::create(std::move(val), std::move(off)); - WeakHash32 hash(col_arr->size()); - col_arr->updateWeakHash32(hash); + WeakHash32 hash = col_arr->getWeakHash32(); checkColumn(hash.getData(), eq_data); } @@ -479,8 +461,7 @@ TEST(WeakHash32, ColumnArray2) auto col_arr = ColumnArray::create(std::move(val), std::move(off)); - WeakHash32 hash(col_arr->size()); - col_arr->updateWeakHash32(hash); + WeakHash32 hash = col_arr->getWeakHash32(); checkColumn(hash.getData(), eq_data); } @@ -536,8 +517,7 @@ TEST(WeakHash32, ColumnArrayArray) auto col_arr = ColumnArray::create(std::move(val), std::move(off)); auto col_arr_arr = ColumnArray::create(std::move(col_arr), std::move(off2)); - WeakHash32 hash(col_arr_arr->size()); - col_arr_arr->updateWeakHash32(hash); + WeakHash32 hash = col_arr_arr->getWeakHash32(); checkColumn(hash.getData(), eq_data); } @@ -555,8 +535,7 @@ TEST(WeakHash32, ColumnConst) auto col_const = ColumnConst::create(std::move(inner_col), 256); - WeakHash32 hash(col_const->size()); - col_const->updateWeakHash32(hash); + WeakHash32 hash = col_const->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -576,8 +555,7 @@ TEST(WeakHash32, ColumnLowcardinality) } } - WeakHash32 hash(col->size()); - col->updateWeakHash32(hash); + WeakHash32 hash = col->getWeakHash32(); checkColumn(hash.getData(), data); } @@ -602,8 +580,7 @@ TEST(WeakHash32, ColumnNullable) auto col_null = ColumnNullable::create(std::move(col), std::move(mask)); - WeakHash32 hash(col_null->size()); - col_null->updateWeakHash32(hash); + WeakHash32 hash = col_null->getWeakHash32(); checkColumn(hash.getData(), eq); } @@ -633,8 +610,7 @@ TEST(WeakHash32, ColumnTupleUInt64UInt64) columns.emplace_back(std::move(col2)); auto col_tuple = ColumnTuple::create(std::move(columns)); - WeakHash32 hash(col_tuple->size()); - col_tuple->updateWeakHash32(hash); + WeakHash32 hash = col_tuple->getWeakHash32(); checkColumn(hash.getData(), eq); } @@ -671,8 +647,7 @@ TEST(WeakHash32, ColumnTupleUInt64String) columns.emplace_back(std::move(col2)); auto col_tuple = ColumnTuple::create(std::move(columns)); - WeakHash32 hash(col_tuple->size()); - col_tuple->updateWeakHash32(hash); + WeakHash32 hash = col_tuple->getWeakHash32(); checkColumn(hash.getData(), eq); } @@ -709,8 +684,7 @@ TEST(WeakHash32, ColumnTupleUInt64FixedString) columns.emplace_back(std::move(col2)); auto col_tuple = ColumnTuple::create(std::move(columns)); - WeakHash32 hash(col_tuple->size()); - col_tuple->updateWeakHash32(hash); + WeakHash32 hash = col_tuple->getWeakHash32(); checkColumn(hash.getData(), eq); } @@ -756,8 +730,7 @@ TEST(WeakHash32, ColumnTupleUInt64Array) columns.emplace_back(ColumnArray::create(std::move(val), std::move(off))); auto col_tuple = ColumnTuple::create(std::move(columns)); - WeakHash32 hash(col_tuple->size()); - col_tuple->updateWeakHash32(hash); + WeakHash32 hash = col_tuple->getWeakHash32(); checkColumn(hash.getData(), eq_data); } diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index bfc85559fe8..1b43e746c69 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -68,7 +68,7 @@ void * allocNoTrack(size_t size, size_t alignment) { void * buf; #if USE_GWP_ASAN - if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) + if (unlikely(GWPAsan::shouldSample())) { if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignment)) { @@ -185,14 +185,11 @@ void * Allocator::realloc(void * buf, size_t old_size, } #if USE_GWP_ASAN - if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) + if (unlikely(GWPAsan::shouldSample())) { + auto trace_alloc = CurrentMemoryTracker::alloc(new_size); if (void * ptr = GWPAsan::GuardedAlloc.allocate(new_size, alignment)) { - auto trace_free = CurrentMemoryTracker::free(old_size); - auto trace_alloc = CurrentMemoryTracker::alloc(new_size); - trace_free.onFree(buf, old_size); - memcpy(ptr, buf, std::min(old_size, new_size)); free(buf, old_size); trace_alloc.onAlloc(buf, new_size); @@ -209,6 +206,7 @@ void * Allocator::realloc(void * buf, size_t old_size, } else { + [[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size); ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed); } } @@ -231,13 +229,17 @@ void * Allocator::realloc(void * buf, size_t old_size, if (alignment <= MALLOC_MIN_ALIGNMENT) { /// Resize malloc'd memory region with no special alignment requirement. - auto trace_free = CurrentMemoryTracker::free(old_size); + /// Realloc can do 2 possible things: + /// - expand existing memory region + /// - allocate new memory block and free the old one + /// Because we don't know which option will be picked we need to make sure there is enough + /// memory for all options auto trace_alloc = CurrentMemoryTracker::alloc(new_size); - trace_free.onFree(buf, old_size); void * new_buf = ::realloc(buf, new_size); if (nullptr == new_buf) { + [[maybe_unused]] auto trace_free = CurrentMemoryTracker::free(new_size); throw DB::ErrnoException( DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot realloc from {} to {}", @@ -246,6 +248,8 @@ void * Allocator::realloc(void * buf, size_t old_size, } buf = new_buf; + auto trace_free = CurrentMemoryTracker::free(old_size); + trace_free.onFree(buf, old_size); trace_alloc.onAlloc(buf, new_size); if constexpr (clear_memory) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index cfb273b9058..6264eb03106 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -49,6 +49,7 @@ void logAboutProgress(LoggerPtr log, size_t processed, size_t total, AtomicStopw AsyncLoader::Pool::Pool(const AsyncLoader::PoolInitializer & init) : name(init.name) , priority(init.priority) + , max_threads(init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores()) , thread_pool(std::make_unique( init.metric_threads, init.metric_active_threads, @@ -56,17 +57,16 @@ AsyncLoader::Pool::Pool(const AsyncLoader::PoolInitializer & init) /* max_threads = */ std::numeric_limits::max(), // Unlimited number of threads, we do worker management ourselves /* max_free_threads = */ 0, // We do not require free threads /* queue_size = */0)) // Unlimited queue to avoid blocking during worker spawning - , max_threads(init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores()) {} AsyncLoader::Pool::Pool(Pool&& o) noexcept : name(o.name) , priority(o.priority) - , thread_pool(std::move(o.thread_pool)) , ready_queue(std::move(o.ready_queue)) , max_threads(o.max_threads) , workers(o.workers) , suspended_workers(o.suspended_workers.load()) // All these constructors are needed because std::atomic is neither copy-constructible, nor move-constructible. We never move pools after init, so it is safe. + , thread_pool(std::move(o.thread_pool)) {} void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 42707a4ee91..05b809aceae 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -365,11 +365,11 @@ private: { const String name; const Priority priority; - std::unique_ptr thread_pool; // NOTE: we avoid using a `ThreadPool` queue to be able to move jobs between pools. std::map ready_queue; // FIFO queue of jobs to be executed in this pool. Map is used for faster erasing. Key is `ready_seqno` size_t max_threads; // Max number of workers to be spawn size_t workers = 0; // Number of currently executing workers std::atomic suspended_workers{0}; // Number of workers that are blocked by `wait()` call on a job executing in the same pool (for deadlock resolution) + std::unique_ptr thread_pool; // NOTE: we avoid using a `ThreadPool` queue to be able to move jobs between pools. explicit Pool(const PoolInitializer & init); Pool(Pool&& o) noexcept; diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index d36c7fd08aa..02bde0d80b7 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -25,6 +25,7 @@ #endif using namespace DB; +namespace fs = std::filesystem; namespace DB { @@ -69,7 +70,7 @@ uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & ke struct CgroupsV1Reader : ICgroupsReader { - explicit CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + explicit CgroupsV1Reader(const fs::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } uint64_t readMemoryUsage() override { @@ -85,7 +86,7 @@ private: struct CgroupsV2Reader : ICgroupsReader { - explicit CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + explicit CgroupsV2Reader(const fs::path & stat_file_dir) : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") { } @@ -129,8 +130,9 @@ std::optional getCgroupsV2Path() if (!cgroupsV2MemoryControllerEnabled()) return {}; - String cgroup = cgroupV2OfProcess(); - auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + fs::path current_cgroup = cgroupV2PathOfProcess(); + if (current_cgroup.empty()) + return {}; /// Return the bottom-most nested current memory file. If there is no such file at the current /// level, try again at the parent level as memory settings are inherited. @@ -138,7 +140,7 @@ std::optional getCgroupsV2Path() { const auto current_path = current_cgroup / "memory.current"; const auto stat_path = current_cgroup / "memory.stat"; - if (std::filesystem::exists(current_path) && std::filesystem::exists(stat_path)) + if (fs::exists(current_path) && fs::exists(stat_path)) return {current_cgroup}; current_cgroup = current_cgroup.parent_path(); } @@ -148,7 +150,7 @@ std::optional getCgroupsV2Path() std::optional getCgroupsV1Path() { auto path = default_cgroups_mount / "memory/memory.stat"; - if (!std::filesystem::exists(path)) + if (!fs::exists(path)) return {}; return {default_cgroups_mount / "memory"}; } diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h new file mode 100644 index 00000000000..9f80ff727b4 --- /dev/null +++ b/src/Common/CollectionOfDerived.h @@ -0,0 +1,187 @@ +#pragma once + +#include + +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/* This is a collections of objects derived from ItemBase. +* Collection contains no more than one instance for each derived type. +* The derived type is used to access the instance. +*/ + +template +class CollectionOfDerivedItems +{ +public: + using Self = CollectionOfDerivedItems; + using ItemPtr = std::shared_ptr; + +private: + struct Rec + { + std::type_index type_idx; + ItemPtr ptr; + + bool operator<(const Rec & other) const + { + return type_idx < other.type_idx; + } + + bool operator<(const std::type_index & value) const + { + return type_idx < value; + } + + bool operator==(const Rec & other) const + { + return type_idx == other.type_idx; + } + }; + using Records = std::vector; + +public: + void swap(Self & other) noexcept + { + records.swap(other.records); + } + + void clear() + { + records.clear(); + } + + bool empty() const + { + return records.empty(); + } + + size_t size() const + { + return records.size(); + } + + Self clone() const + { + Self result; + result.records.reserve(records.size()); + for (const auto & rec : records) + result.records.emplace_back(rec.type_idx, rec.ptr->clone()); + return result; + } + + // append items for other inscnace only if there is no such item in current instance + void appendIfUniq(Self && other) + { + auto middle_idx = records.size(); + std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); + // merge is stable + std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end()); + // remove duplicates + records.erase(std::unique(records.begin(), records.end()), records.end()); + + assert(std::is_sorted(records.begin(), records.end())); + assert(isUniqTypes()); + } + + template + void add(std::shared_ptr info) + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + return addImpl(std::type_index(typeid(T)), std::move(info)); + } + + template + std::shared_ptr get() const + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + auto it = getImpl(std::type_index(typeid(T))); + if (it == records.cend()) + return nullptr; + auto cast = std::dynamic_pointer_cast(it->ptr); + chassert(cast); + return cast; + } + + template + std::shared_ptr extract() + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + auto it = getImpl(std::type_index(typeid(T))); + if (it == records.cend()) + return nullptr; + auto cast = std::dynamic_pointer_cast(it->ptr); + chassert(cast); + + records.erase(it); + return cast; + } + + std::string debug() const + { + std::string result; + + for (auto & rec : records) + { + result.append(rec.type_idx.name()); + result.append(" "); + } + + return result; + } + +private: + bool isUniqTypes() const + { + auto uniq_it = std::adjacent_find(records.begin(), records.end()); + return uniq_it == records.end(); + } + + void addImpl(std::type_index type_idx, ItemPtr item) + { + auto it = std::lower_bound(records.begin(), records.end(), type_idx); + + if (it == records.end()) + { + records.emplace_back(type_idx, item); + return; + } + + if (it->type_idx == type_idx) + throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name()); + + + records.emplace(it, type_idx, item); + } + + Records::const_iterator getImpl(std::type_index type_idx) const + { + auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); + + if (it == records.cend()) + return records.cend(); + + if (it->type_idx != type_idx) + return records.cend(); + + return it; + } + + Records records; +}; + +} diff --git a/src/Common/Config/AbstractConfigurationComparison.cpp b/src/Common/Config/AbstractConfigurationComparison.cpp index 607b583cf31..80c837ed43b 100644 --- a/src/Common/Config/AbstractConfigurationComparison.cpp +++ b/src/Common/Config/AbstractConfigurationComparison.cpp @@ -38,7 +38,7 @@ namespace std::erase_if(left_subkeys, [&](const String & key) { return ignore_keys->contains(key); }); std::erase_if(right_subkeys, [&](const String & key) { return ignore_keys->contains(key); }); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) /// Compound `ignore_keys` are not yet implemented. for (const auto & ignore_key : *ignore_keys) chassert(ignore_key.find('.') == std::string_view::npos); diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 7c97e73f278..1011ab12d15 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -234,10 +234,10 @@ M(PartsCommitted, "Deprecated. See PartsActive.") \ M(PartsPreActive, "The part is in data_parts, but not used for SELECTs.") \ M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \ - M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \ - M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \ - M(AttachedView, "Active view, used by current and upcoming SELECTs.") \ - M(AttachedDictionary, "Active dictionary, used by current and upcoming SELECTs.") \ + M(AttachedDatabase, "Active databases.") \ + M(AttachedTable, "Active tables.") \ + M(AttachedView, "Active views.") \ + M(AttachedDictionary, "Active dictionaries.") \ M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \ M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \ M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \ diff --git a/src/Common/DateLUT.cpp b/src/Common/DateLUT.cpp index 3a20fb1a125..f9bbe2fbf40 100644 --- a/src/Common/DateLUT.cpp +++ b/src/Common/DateLUT.cpp @@ -2,7 +2,9 @@ #include #include +#include #include +#include #include #include diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 111280074dd..33befa64946 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -64,7 +64,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc { // In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure. // Log the message before we fail. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (code == ErrorCodes::LOGICAL_ERROR) { abortOnFailedAssertion(msg, trace.data(), 0, trace.size()); @@ -443,7 +443,7 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b } catch (...) {} // NOLINT(bugprone-empty-catch) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD try { throw; diff --git a/src/Common/Exception.h b/src/Common/Exception.h index a4774a89f6a..4e54c411bf1 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -244,6 +244,15 @@ private: const char * className() const noexcept override { return "DB::ErrnoException"; } }; +/// An exception to use in unit tests to test interfaces. +/// It is distinguished from others, so it does not have to be logged. +class TestException : public Exception +{ +public: + using Exception::Exception; +}; + + using Exceptions = std::vector; /** Try to write an exception to the log (and forget about it). diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 5454cba8e2e..7b8b5036af0 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -57,7 +57,8 @@ static struct InitFiu PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \ PAUSEABLE(dummy_pausable_failpoint) \ ONCE(execute_query_calling_empty_set_result_func_on_exception) \ - ONCE(receive_timeout_on_table_status_response) + ONCE(receive_timeout_on_table_status_response) \ + REGULAR(keepermap_fail_drop_data) \ namespace FailPoints diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index 48fbd07ec34..de6991191ea 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -217,6 +217,13 @@ void printReport([[maybe_unused]] uintptr_t fault_address) reinterpret_cast(trace.data()), 0, trace_length, [&](const auto line) { LOG_FATAL(logger, fmt::runtime(line)); }); } +std::atomic init_finished = false; + +void initFinished() +{ + init_finished.store(true, std::memory_order_relaxed); +} + std::atomic force_sample_probability = 0.0; void setForceSampleProbability(double value) diff --git a/src/Common/GWPAsan.h b/src/Common/GWPAsan.h index b3215c6157e..846c3417db4 100644 --- a/src/Common/GWPAsan.h +++ b/src/Common/GWPAsan.h @@ -19,12 +19,30 @@ bool isGWPAsanError(uintptr_t fault_address); void printReport(uintptr_t fault_address); +extern std::atomic init_finished; + +void initFinished(); + extern std::atomic force_sample_probability; void setForceSampleProbability(double value); +/** + * We'd like to postpone sampling allocations under the startup is finished. There are mainly + * two reasons for that: + * + * - To avoid complex issues with initialization order + * - Don't waste MaxSimultaneousAllocations on global objects as it's not useful +*/ +inline bool shouldSample() +{ + return init_finished.load(std::memory_order_relaxed) && GuardedAlloc.shouldSample(); +} + inline bool shouldForceSample() { + if (!init_finished.load(std::memory_order_relaxed)) + return false; std::bernoulli_distribution dist(force_sample_probability.load(std::memory_order_relaxed)); return dist(thread_local_rng); } diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp index dc5704ef6b5..77b7867b45d 100644 --- a/src/Common/GetPriorityForLoadBalancing.cpp +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -1,3 +1,4 @@ +#include #include #include diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index 01dae9a1289..d4c68803866 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -1,6 +1,10 @@ #pragma once -#include +#include +#include + +#include +#include namespace DB { diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h index 74d4b6e3bcb..431e5c7b733 100644 --- a/src/Common/IFactoryWithAliases.h +++ b/src/Common/IFactoryWithAliases.h @@ -39,16 +39,16 @@ protected: public: /// For compatibility with SQL, it's possible to specify that certain function name is case insensitive. - enum CaseSensitiveness + enum Case { - CaseSensitive, - CaseInsensitive + Sensitive, + Insensitive }; /** Register additional name for value * real_name have to be already registered. */ - void registerAlias(const String & alias_name, const String & real_name, CaseSensitiveness case_sensitiveness = CaseSensitive) + void registerAlias(const String & alias_name, const String & real_name, Case case_sensitiveness = Sensitive) { const auto & creator_map = getMap(); const auto & case_insensitive_creator_map = getCaseInsensitiveMap(); @@ -66,12 +66,12 @@ public: } /// We need sure the real_name exactly exists when call the function directly. - void registerAliasUnchecked(const String & alias_name, const String & real_name, CaseSensitiveness case_sensitiveness = CaseSensitive) + void registerAliasUnchecked(const String & alias_name, const String & real_name, Case case_sensitiveness = Sensitive) { String alias_name_lowercase = Poco::toLower(alias_name); const String factory_name = getFactoryName(); - if (case_sensitiveness == CaseInsensitive) + if (case_sensitiveness == Insensitive) { if (!case_insensitive_aliases.emplace(alias_name_lowercase, real_name).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: case insensitive alias name '{}' is not unique", factory_name, alias_name); diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 28cfa98666a..ac412684cf7 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -192,7 +192,7 @@ void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused] { /// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug). /// Let's find them. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (size < 0) return; diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.cpp b/src/Common/NamedCollections/NamedCollectionsFactory.cpp index 2faea1957ba..bc283d2be1a 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.cpp +++ b/src/Common/NamedCollections/NamedCollectionsFactory.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include namespace DB diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index b3671350f92..36191b89e86 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -115,32 +116,32 @@ public: return elements; } - bool exists(const std::string & path) const override + bool exists(const std::string & file_name) const override { - return fs::exists(getPath(path)); + return fs::exists(getPath(file_name)); } - std::string read(const std::string & path) const override + std::string read(const std::string & file_name) const override { - ReadBufferFromFile in(getPath(path)); + ReadBufferFromFile in(getPath(file_name)); std::string data; readStringUntilEOF(data, in); return data; } - void write(const std::string & path, const std::string & data, bool replace) override + void write(const std::string & file_name, const std::string & data, bool replace) override { - if (!replace && fs::exists(path)) + if (!replace && fs::exists(file_name)) { throw Exception( ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, "Metadata file {} for named collection already exists", - path); + file_name); } fs::create_directories(root_path); - auto tmp_path = getPath(path + ".tmp"); + auto tmp_path = getPath(file_name + ".tmp"); WriteBufferFromFile out(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL); writeString(data, out); @@ -149,28 +150,32 @@ public: out.sync(); out.close(); - fs::rename(tmp_path, getPath(path)); + fs::rename(tmp_path, getPath(file_name)); } - void remove(const std::string & path) override + void remove(const std::string & file_name) override { - if (!removeIfExists(getPath(path))) + if (!removeIfExists(file_name)) { throw Exception( ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, - "Cannot remove `{}`, because it doesn't exist", path); + "Cannot remove `{}`, because it doesn't exist", file_name); } } - bool removeIfExists(const std::string & path) override + bool removeIfExists(const std::string & file_name) override { - return fs::remove(getPath(path)); + return fs::remove(getPath(file_name)); } private: - std::string getPath(const std::string & path) const + std::string getPath(const std::string & file_name) const { - return fs::path(root_path) / path; + const auto file_name_as_path = fs::path(file_name); + if (file_name_as_path.is_absolute()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Filename {} cannot be an absolute path", file_name); + + return fs::path(root_path) / file_name_as_path; } /// Delete .tmp files. They could be left undeleted in case of @@ -263,49 +268,49 @@ public: return children; } - bool exists(const std::string & path) const override + bool exists(const std::string & file_name) const override { - return getClient()->exists(getPath(path)); + return getClient()->exists(getPath(file_name)); } - std::string read(const std::string & path) const override + std::string read(const std::string & file_name) const override { - return getClient()->get(getPath(path)); + return getClient()->get(getPath(file_name)); } - void write(const std::string & path, const std::string & data, bool replace) override + void write(const std::string & file_name, const std::string & data, bool replace) override { if (replace) { - getClient()->createOrUpdate(getPath(path), data, zkutil::CreateMode::Persistent); + getClient()->createOrUpdate(getPath(file_name), data, zkutil::CreateMode::Persistent); } else { - auto code = getClient()->tryCreate(getPath(path), data, zkutil::CreateMode::Persistent); + auto code = getClient()->tryCreate(getPath(file_name), data, zkutil::CreateMode::Persistent); if (code == Coordination::Error::ZNODEEXISTS) { throw Exception( ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, "Metadata file {} for named collection already exists", - path); + file_name); } } } - void remove(const std::string & path) override + void remove(const std::string & file_name) override { - getClient()->remove(getPath(path)); + getClient()->remove(getPath(file_name)); } - bool removeIfExists(const std::string & path) override + bool removeIfExists(const std::string & file_name) override { - auto code = getClient()->tryRemove(getPath(path)); + auto code = getClient()->tryRemove(getPath(file_name)); if (code == Coordination::Error::ZOK) return true; if (code == Coordination::Error::ZNONODE) return false; - throw Coordination::Exception::fromPath(code, getPath(path)); + throw Coordination::Exception::fromPath(code, getPath(file_name)); } private: @@ -319,9 +324,13 @@ private: return zookeeper_client; } - std::string getPath(const std::string & path) const + std::string getPath(const std::string & file_name) const { - return fs::path(root_path) / path; + const auto file_name_as_path = fs::path(file_name); + if (file_name_as_path.is_absolute()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Filename {} cannot be an absolute path", file_name); + + return fs::path(root_path) / file_name_as_path; } }; diff --git a/src/Common/PageCache.cpp b/src/Common/PageCache.cpp index 56bd8c1a339..d719a387e14 100644 --- a/src/Common/PageCache.cpp +++ b/src/Common/PageCache.cpp @@ -424,7 +424,7 @@ static void logUnexpectedSyscallError(std::string name) { std::string message = fmt::format("{} failed: {}", name, errnoToString()); LOG_WARNING(&Poco::Logger::get("PageCache"), "{}", message); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) volatile bool true_ = true; if (true_) // suppress warning about missing [[noreturn]] abortOnFailedAssertion(message); diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 871ba7cab8b..2b090136e2e 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -442,8 +442,6 @@ The server successfully detected this situation and will download merged part fr M(ReadBufferFromS3InitMicroseconds, "Time spent initializing connection to S3.") \ M(ReadBufferFromS3Bytes, "Bytes read from S3.") \ M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \ - M(ReadBufferFromS3ResetSessions, "Number of HTTP sessions that were reset in ReadBufferFromS3.") \ - M(ReadBufferFromS3PreservedSessions, "Number of HTTP sessions that were preserved in ReadBufferFromS3.") \ \ M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \ M(WriteBufferFromS3Bytes, "Bytes written to S3.") \ diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 0b482cb09be..8f0fb3cac6c 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -1,6 +1,7 @@ #include "ProgressIndication.h" #include #include +#include #include #include #include diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index ae39fb49bcc..9baa1c7ffc4 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -1,14 +1,15 @@ #pragma once -#include -#include -#include #include #include #include #include #include +#include +#include +#include +#include namespace DB { @@ -71,11 +72,6 @@ public: /// How much seconds passed since query execution start. double elapsedSeconds() const { return getElapsedNanoseconds() / 1e9; } - void updateThreadEventData(HostToTimesMap & new_hosts_data); - -private: - double getCPUUsage(); - struct MemoryUsage { UInt64 total = 0; @@ -85,6 +81,11 @@ private: MemoryUsage getMemoryUsage() const; + void updateThreadEventData(HostToTimesMap & new_hosts_data); + +private: + double getCPUUsage(); + UInt64 getElapsedNanoseconds() const; /// This flag controls whether to show the progress bar. We start showing it after diff --git a/src/Common/SignalHandlers.cpp b/src/Common/SignalHandlers.cpp new file mode 100644 index 00000000000..a4b7784df5c --- /dev/null +++ b/src/Common/SignalHandlers.cpp @@ -0,0 +1,635 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma clang diagnostic ignored "-Wreserved-identifier" + +namespace DB +{ +namespace ErrorCodes +{ +extern const int CANNOT_SET_SIGNAL_HANDLER; +extern const int CANNOT_SEND_SIGNAL; +} +} + +using namespace DB; + + +void call_default_signal_handler(int sig) +{ + if (SIG_ERR == signal(sig, SIG_DFL)) + throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); + + if (0 != raise(sig)) + throw ErrnoException(ErrorCodes::CANNOT_SEND_SIGNAL, "Cannot send signal"); +} + + +void writeSignalIDtoSignalPipe(int sig) +{ + auto saved_errno = errno; /// We must restore previous value of errno in signal handler. + + char buf[signal_pipe_buf_size]; + auto & signal_pipe = HandledSignals::instance().signal_pipe; + WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf); + writeBinary(sig, out); + out.next(); + + errno = saved_errno; +} + +void closeLogsSignalHandler(int sig, siginfo_t *, void *) +{ + DENY_ALLOCATIONS_IN_SCOPE; + writeSignalIDtoSignalPipe(sig); +} + +void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) +{ + DENY_ALLOCATIONS_IN_SCOPE; + writeSignalIDtoSignalPipe(sig); +} + + +void signalHandler(int sig, siginfo_t * info, void * context) +{ + if (asynchronous_stack_unwinding && sig == SIGSEGV) + siglongjmp(asynchronous_stack_unwinding_signal_jump_buffer, 1); + + DENY_ALLOCATIONS_IN_SCOPE; + auto saved_errno = errno; /// We must restore previous value of errno in signal handler. + + char buf[signal_pipe_buf_size]; + auto & signal_pipe = HandledSignals::instance().signal_pipe; + WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf); + + const ucontext_t * signal_context = reinterpret_cast(context); + const StackTrace stack_trace(*signal_context); + +#if USE_GWP_ASAN + if (const auto fault_address = reinterpret_cast(info->si_addr); + GWPAsan::isGWPAsanError(fault_address)) + GWPAsan::printReport(fault_address); +#endif + + writeBinary(sig, out); + writePODBinary(*info, out); + writePODBinary(signal_context, out); + writePODBinary(stack_trace, out); + writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector{}, out); + writeBinary(static_cast(getThreadId()), out); + writePODBinary(current_thread, out); + + out.next(); + + if (sig != SIGTSTP) /// This signal is used for debugging. + { + /// The time that is usually enough for separate thread to print info into log. + /// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case. + for (size_t i = 0; i < 300; ++i) + { + /// We will synchronize with the thread printing the messages with an atomic variable to finish earlier. + if (HandledSignals::instance().fatal_error_printed.test()) + break; + + /// This coarse method of synchronization is perfectly ok for fatal signals. + sleepForSeconds(1); + } + + /// Wait for all logs flush operations + sleepForSeconds(3); + call_default_signal_handler(sig); + } + + errno = saved_errno; +} + + +[[noreturn]] void terminate_handler() +{ + static thread_local bool terminating = false; + if (terminating) + abort(); + + terminating = true; + + std::string log_message; + + if (std::current_exception()) + log_message = "Terminate called for uncaught exception:\n" + getCurrentExceptionMessage(true); + else + log_message = "Terminate called without an active exception"; + + /// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe + /// And the buffer should not be too small because our exception messages can be large. + static constexpr size_t buf_size = PIPE_BUF; + + if (log_message.size() > buf_size - 16) + log_message.resize(buf_size - 16); + + char buf[buf_size]; + auto & signal_pipe = HandledSignals::instance().signal_pipe; + WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], buf_size, buf); + + writeBinary(static_cast(SignalListener::StdTerminate), out); + writeBinary(static_cast(getThreadId()), out); + writeBinary(log_message, out); + out.next(); + + abort(); +} + +#if defined(SANITIZER) +template +struct ValueHolder +{ + ValueHolder(T value_) : value(value_) + {} + + T value; +}; + +extern "C" void __sanitizer_set_death_callback(void (*)()); + +/// Sanitizers may not expect some function calls from death callback. +/// Let's try to disable instrumentation to avoid possible issues. +/// However, this callback may call other functions that are still instrumented. +/// We can try [[clang::always_inline]] attribute for statements in future (available in clang-15) +/// See https://github.com/google/sanitizers/issues/1543 and https://github.com/google/sanitizers/issues/1549. +static DISABLE_SANITIZER_INSTRUMENTATION void sanitizerDeathCallback() +{ + DENY_ALLOCATIONS_IN_SCOPE; + /// Also need to send data via pipe. Otherwise it may lead to deadlocks or failures in printing diagnostic info. + + char buf[signal_pipe_buf_size]; + auto & signal_pipe = HandledSignals::instance().signal_pipe; + WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf); + + const StackTrace stack_trace; + + writeBinary(SignalListener::SanitizerTrap, out); + writePODBinary(stack_trace, out); + /// We create a dummy struct with a constructor so DISABLE_SANITIZER_INSTRUMENTATION is not applied to it + /// otherwise, Memory sanitizer can't know that values initiialized inside this function are actually initialized + /// because instrumentations are disabled leading to false positives later on + ValueHolder thread_id{static_cast(getThreadId())}; + writeBinary(thread_id.value, out); + writePODBinary(current_thread, out); + + out.next(); + + /// The time that is usually enough for separate thread to print info into log. + sleepForSeconds(20); +} +#endif + + +void HandledSignals::addSignalHandler(const std::vector & signals, signal_function handler, bool register_signal) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO; + +#if defined(OS_DARWIN) + sigemptyset(&sa.sa_mask); + for (auto signal : signals) + sigaddset(&sa.sa_mask, signal); +#else + if (sigemptyset(&sa.sa_mask)) + throw Poco::Exception("Cannot set signal handler."); + + for (auto signal : signals) + if (sigaddset(&sa.sa_mask, signal)) + throw Poco::Exception("Cannot set signal handler."); +#endif + + for (auto signal : signals) + if (sigaction(signal, &sa, nullptr)) + throw Poco::Exception("Cannot set signal handler."); + + if (register_signal) + std::copy(signals.begin(), signals.end(), std::back_inserter(handled_signals)); +} + +void blockSignals(const std::vector & signals) +{ + sigset_t sig_set; + +#if defined(OS_DARWIN) + sigemptyset(&sig_set); + for (auto signal : signals) + sigaddset(&sig_set, signal); +#else + if (sigemptyset(&sig_set)) + throw Poco::Exception("Cannot block signal."); + + for (auto signal : signals) + if (sigaddset(&sig_set, signal)) + throw Poco::Exception("Cannot block signal."); +#endif + + if (pthread_sigmask(SIG_BLOCK, &sig_set, nullptr)) + throw Poco::Exception("Cannot block signal."); +} + + +void SignalListener::run() +{ + static_assert(PIPE_BUF >= 512); + static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512"); + char buf[signal_pipe_buf_size]; + auto & signal_pipe = HandledSignals::instance().signal_pipe; + ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf); + + while (!in.eof()) + { + int sig = 0; + readBinary(sig, in); + // We may log some specific signals afterwards, with different log + // levels and more info, but for completeness we log all signals + // here at trace level. + // Don't use strsignal here, because it's not thread-safe. + LOG_TRACE(log, "Received signal {}", sig); + + if (sig == StopThread) + { + LOG_INFO(log, "Stop SignalListener thread"); + break; + } + else if (sig == SIGHUP) + { + LOG_DEBUG(log, "Received signal to close logs."); + BaseDaemon::instance().closeLogs(BaseDaemon::instance().logger()); + LOG_INFO(log, "Opened new log file after received signal."); + } + else if (sig == StdTerminate) + { + UInt32 thread_num; + std::string message; + + readBinary(thread_num, in); + readBinary(message, in); + + onTerminate(message, thread_num); + } + else if (sig == SIGINT || + sig == SIGQUIT || + sig == SIGTERM) + { + if (daemon) + daemon->handleSignal(sig); + } + else + { + siginfo_t info{}; + ucontext_t * context{}; + StackTrace stack_trace(NoCapture{}); + std::vector thread_frame_pointers; + UInt32 thread_num{}; + ThreadStatus * thread_ptr{}; + + if (sig != SanitizerTrap) + { + readPODBinary(info, in); + readPODBinary(context, in); + } + + readPODBinary(stack_trace, in); + if (sig != SanitizerTrap) + readVectorBinary(thread_frame_pointers, in); + readBinary(thread_num, in); + readPODBinary(thread_ptr, in); + + /// This allows to receive more signals if failure happens inside onFault function. + /// Example: segfault while symbolizing stack trace. + try + { + std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_frame_pointers, thread_num, thread_ptr); }).detach(); + } + catch (...) + { + /// Likely cannot allocate thread + onFault(sig, info, context, stack_trace, thread_frame_pointers, thread_num, thread_ptr); + } + } + } +} + +void SignalListener::onTerminate(std::string_view message, UInt32 thread_num) const +{ + size_t pos = message.find('\n'); + + LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) {}", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", thread_num, message.substr(0, pos)); + + /// Print trace from std::terminate exception line-by-line to make it easy for grep. + while (pos != std::string_view::npos) + { + ++pos; + size_t next_pos = message.find('\n', pos); + size_t size = next_pos; + if (next_pos != std::string_view::npos) + size = next_pos - pos; + + LOG_FATAL(log, fmt::runtime(message.substr(pos, size))); + pos = next_pos; + } +} + +void SignalListener::onFault( + int sig, + const siginfo_t & info, + ucontext_t * context, + const StackTrace & stack_trace, + const std::vector & thread_frame_pointers, + UInt32 thread_num, + DB::ThreadStatus * thread_ptr) const +try +{ + ThreadStatus thread_status; + + /// First log those fields that are safe to access and that should not cause new fault. + /// That way we will have some duplicated info in the log but we don't loose important info + /// in case of double fault. + + LOG_FATAL(log, "########## Short fault info ############"); + LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + thread_num, sig); + + std::string signal_description = "Unknown signal"; + + /// Some of these are not really signals, but our own indications on failure reason. + if (sig == StdTerminate) + signal_description = "std::terminate"; + else if (sig == SanitizerTrap) + signal_description = "sanitizer trap"; + else if (sig >= 0) + signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context + + LOG_FATAL(log, "Signal description: {}", signal_description); + + String error_message; + + if (sig != SanitizerTrap) + error_message = signalToErrorMessage(sig, info, *context); + else + error_message = "Sanitizer trap."; + + LOG_FATAL(log, fmt::runtime(error_message)); + + String bare_stacktrace_str; + if (stack_trace.getSize()) + { + /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace. + /// NOTE: This still require memory allocations and mutex lock inside logger. + /// BTW we can also print it to stderr using write syscalls. + + WriteBufferFromOwnString bare_stacktrace; + writeString("Stack trace:", bare_stacktrace); + for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) + { + writeChar(' ', bare_stacktrace); + writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace); + } + + LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); + bare_stacktrace_str = bare_stacktrace.str(); + } + + /// Now try to access potentially unsafe data in thread_ptr. + + String query_id; + String query; + + /// Send logs from this thread to client if possible. + /// It will allow client to see failure messages directly. + if (thread_ptr) + { + query_id = thread_ptr->getQueryId(); + query = thread_ptr->getQueryForLog(); + + if (auto logs_queue = thread_ptr->getInternalTextLogsQueue()) + { + CurrentThread::attachInternalTextLogsQueue(logs_queue, LogsLevel::trace); + } + } + + LOG_FATAL(log, "########################################"); + + if (query_id.empty()) + { + LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (no query) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + thread_num, signal_description, sig); + } + else + { + LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + thread_num, query_id, query, signal_description, sig); + } + + LOG_FATAL(log, fmt::runtime(error_message)); + + if (!bare_stacktrace_str.empty()) + { + LOG_FATAL(log, fmt::runtime(bare_stacktrace_str)); + } + + /// Write symbolized stack trace line by line for better grep-ability. + stack_trace.toStringEveryLine([&](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); }); + + /// In case it's a scheduled job write all previous jobs origins call stacks + std::for_each(thread_frame_pointers.rbegin(), thread_frame_pointers.rend(), + [this](const StackTrace::FramePointers & frame_pointers) + { + if (size_t size = std::ranges::find(frame_pointers, nullptr) - frame_pointers.begin()) + { + LOG_FATAL(log, "========================================"); + WriteBufferFromOwnString bare_stacktrace; + writeString("Job's origin stack trace:", bare_stacktrace); + std::for_each_n(frame_pointers.begin(), size, + [&bare_stacktrace](const void * ptr) + { + writeChar(' ', bare_stacktrace); + writePointerHex(ptr, bare_stacktrace); + } + ); + + LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); + + StackTrace::toStringEveryLine(const_cast(frame_pointers.data()), 0, size, [this](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); }); + } + } + ); + + +#if defined(OS_LINUX) + /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace. + /// Please keep the below log messages in-sync with the ones in programs/server/Server.cpp + + if (daemon && daemon->stored_binary_hash.empty()) + { + LOG_FATAL(log, "Integrity check of the executable skipped because the reference checksum could not be read."); + } + else if (daemon) + { + String calculated_binary_hash = getHashOfLoadedBinaryHex(); + if (calculated_binary_hash == daemon->stored_binary_hash) + { + LOG_FATAL(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash); + } + else + { + LOG_FATAL( + log, + "Calculated checksum of the executable ({0}) does not correspond" + " to the reference checksum stored in the executable ({1})." + " This may indicate one of the following:" + " - the executable was changed just after startup;" + " - the executable was corrupted on disk due to faulty hardware;" + " - the loaded executable was corrupted in memory due to faulty hardware;" + " - the file was intentionally modified;" + " - a logical error in the code.", + calculated_binary_hash, + daemon->stored_binary_hash); + } + } +#endif + + /// Write crash to system.crash_log table if available. + if (collectCrashLog) + collectCrashLog(sig, thread_num, query_id, stack_trace); + + Context::getGlobalContextInstance()->handleCrash(); + + /// Send crash report to developers (if configured) + if (sig != SanitizerTrap) + { + if (daemon) + { + if (auto * sentry = SentryWriter::getInstance()) + sentry->onSignal(sig, error_message, stack_trace.getFramePointers(), stack_trace.getOffset(), stack_trace.getSize()); + } + + /// Advice the user to send it manually. + if (std::string_view(VERSION_OFFICIAL).contains("official build")) + { + const auto & date_lut = DateLUT::instance(); + + /// Approximate support period, upper bound. + if (time(nullptr) - date_lut.makeDate(2000 + VERSION_MAJOR, VERSION_MINOR, 1) < (365 + 30) * 86400) + { + LOG_FATAL(log, "Report this error to https://github.com/ClickHouse/ClickHouse/issues"); + } + else + { + LOG_FATAL(log, "ClickHouse version {} is old and should be upgraded to the latest version.", VERSION_STRING); + } + } + else + { + LOG_FATAL(log, "This ClickHouse version is not official and should be upgraded to the official build."); + } + } + + /// List changed settings. + if (!query_id.empty()) + { + ContextPtr query_context = thread_ptr->getQueryContext(); + if (query_context) + { + String changed_settings = query_context->getSettingsRef().toString(); + + if (changed_settings.empty()) + LOG_FATAL(log, "No settings were changed"); + else + LOG_FATAL(log, "Changed settings: {}", changed_settings); + } + } + + /// When everything is done, we will try to send these error messages to the client. + if (thread_ptr) + thread_ptr->onFatalError(); + + HandledSignals::instance().fatal_error_printed.test_and_set(); +} +catch (...) +{ + /// onFault is called from the std::thread, and it should catch all exceptions; otherwise, you can get unrelated fatal errors. + PreformattedMessage message = getCurrentExceptionMessageAndPattern(true); + LOG_FATAL(log, message); +} + +HandledSignals::HandledSignals() +{ + signal_pipe.setNonBlockingWrite(); + signal_pipe.tryIncreaseSize(1 << 20); +} + +void HandledSignals::reset() +{ + /// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after. + for (int sig : handled_signals) + { + if (SIG_ERR == signal(sig, SIG_DFL)) + { + try + { + throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); + } + catch (ErrnoException &) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + } + + signal_pipe.close(); +} + +HandledSignals::~HandledSignals() +{ + reset(); +}; + +HandledSignals & HandledSignals::instance() +{ + static HandledSignals res; + return res; +} + +void HandledSignals::setupTerminateHandler() +{ + std::set_terminate(terminate_handler); +} + +void HandledSignals::setupCommonDeadlySignalHandlers() +{ + /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime. + addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, true); + +#if defined(SANITIZER) + __sanitizer_set_death_callback(sanitizerDeathCallback); +#endif +} + +void HandledSignals::setupCommonTerminateRequestSignalHandlers() +{ + addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, true); +} diff --git a/src/Common/SignalHandlers.h b/src/Common/SignalHandlers.h new file mode 100644 index 00000000000..e7519f7aee2 --- /dev/null +++ b/src/Common/SignalHandlers.h @@ -0,0 +1,110 @@ +#pragma once +#include + +#include +#include +#include +#include +#include +#include + +class BaseDaemon; + +/** Reset signal handler to the default and send signal to itself. + * It's called from user signal handler to write core dump. + */ +void call_default_signal_handler(int sig); + +const size_t signal_pipe_buf_size = + sizeof(int) + + sizeof(siginfo_t) + + sizeof(ucontext_t*) + + sizeof(StackTrace) + + sizeof(UInt64) + + sizeof(UInt32) + + sizeof(void*); + +using signal_function = void(int, siginfo_t*, void*); + +void writeSignalIDtoSignalPipe(int sig); + +/** Signal handler for HUP */ +void closeLogsSignalHandler(int sig, siginfo_t *, void *); + +void terminateRequestedSignalHandler(int sig, siginfo_t *, void *); + + +/** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log. + */ +void signalHandler(int sig, siginfo_t * info, void * context); + + +/** To use with std::set_terminate. + * Collects slightly more info than __gnu_cxx::__verbose_terminate_handler, + * and send it to pipe. Other thread will read this info from pipe and asynchronously write it to log. + * Look at libstdc++-v3/libsupc++/vterminate.cc for example. + */ +[[noreturn]] void terminate_handler(); + +/// Avoid link time dependency on DB/Interpreters - will use this function only when linked. +__attribute__((__weak__)) void collectCrashLog( + Int32 signal, UInt64 thread_id, const String & query_id, const StackTrace & stack_trace); + + +void blockSignals(const std::vector & signals); + + +/** The thread that read info about signal or std::terminate from pipe. + * On HUP, close log files (for new files to be opened later). + * On information about std::terminate, write it to log. + * On other signals, write info to log. + */ +class SignalListener : public Poco::Runnable +{ +public: + static constexpr int StdTerminate = -1; + static constexpr int StopThread = -2; + static constexpr int SanitizerTrap = -3; + + explicit SignalListener(BaseDaemon * daemon_, LoggerPtr log_) + : daemon(daemon_), log(log_) + { + } + + void run() override; + +private: + BaseDaemon * daemon; + LoggerPtr log; + + void onTerminate(std::string_view message, UInt32 thread_num) const; + + void onFault( + int sig, + const siginfo_t & info, + ucontext_t * context, + const StackTrace & stack_trace, + const std::vector & thread_frame_pointers, + UInt32 thread_num, + DB::ThreadStatus * thread_ptr) const; +}; + +struct HandledSignals +{ + std::vector handled_signals; + DB::PipeFDs signal_pipe; + std::atomic_flag fatal_error_printed; + + HandledSignals(); + ~HandledSignals(); + + void setupTerminateHandler(); + void setupCommonDeadlySignalHandlers(); + void setupCommonTerminateRequestSignalHandlers(); + + void addSignalHandler(const std::vector & signals, signal_function handler, bool register_signal); + + void reset(); + + static HandledSignals & instance(); +}; diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 7b96d1f075b..719ac9593ff 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -12,13 +12,14 @@ #include #include -#include +#include #include #include +#include +#include #include -#include #include "config.h" // USE_JEMALLOC diff --git a/src/Common/TraceSender.cpp b/src/Common/TraceSender.cpp index 91d07367a82..f1adf7c516a 100644 --- a/src/Common/TraceSender.cpp +++ b/src/Common/TraceSender.cpp @@ -23,8 +23,20 @@ namespace DB LazyPipeFDs TraceSender::pipe; +static thread_local bool inside_send = false; void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Extras extras) { + /** The method shouldn't be called recursively or throw exceptions. + * There are several reasons: + * - avoid infinite recursion when some of subsequent functions invoke tracing; + * - avoid inconsistent writes if the method was interrupted by a signal handler in the middle of writing, + * and then another tracing is invoked (e.g., from query profiler). + */ + if (unlikely(inside_send)) + return; + inside_send = true; + DENY_ALLOCATIONS_IN_SCOPE; + constexpr size_t buf_size = sizeof(char) /// TraceCollector stop flag + sizeof(UInt8) /// String size + QUERY_ID_MAX_LEN /// Maximum query_id length @@ -80,6 +92,8 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Ext writePODBinary(extras.increment, out); out.next(); + + inside_send = false; } } diff --git a/src/Common/WeakHash.cpp b/src/Common/WeakHash.cpp index 54d973b6296..cb12df84db1 100644 --- a/src/Common/WeakHash.cpp +++ b/src/Common/WeakHash.cpp @@ -1,2 +1,24 @@ #include +#include +#include +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +void WeakHash32::update(const WeakHash32 & other) +{ + size_t size = data.size(); + if (size != other.data.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match:" + "left size is {}, right size is {}", size, other.data.size()); + + for (size_t i = 0; i < size; ++i) + data[i] = static_cast(intHashCRC32(other.data[i], data[i])); +} + +} diff --git a/src/Common/WeakHash.h b/src/Common/WeakHash.h index b59624e64f2..d4a8d63868c 100644 --- a/src/Common/WeakHash.h +++ b/src/Common/WeakHash.h @@ -11,9 +11,8 @@ namespace DB /// The main purpose why this class needed is to support data initialization. Initially, every bit is 1. class WeakHash32 { - static constexpr UInt32 kDefaultInitialValue = ~UInt32(0); - public: + static constexpr UInt32 kDefaultInitialValue = ~UInt32(0); using Container = PaddedPODArray; @@ -22,6 +21,8 @@ public: void reset(size_t size, UInt32 initial_value = kDefaultInitialValue) { data.assign(size, initial_value); } + void update(const WeakHash32 & other); + const Container & getData() const { return data; } Container & getData() { return data; } diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 56db9adb787..01bb508da95 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -13,14 +13,14 @@ #include #include #include -#include #include +#include #include #include #include -#include "Common/ZooKeeper/IKeeper.h" -#include +#include #include +#include #include #include @@ -114,7 +114,11 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr /// availability_zones is empty on server startup or after config reloading /// We will keep the az info when starting new sessions availability_zones = args.availability_zones; - LOG_TEST(log, "Availability zones from config: [{}], client: {}", fmt::join(availability_zones, ", "), args.client_availability_zone); + + LOG_TEST(log, "Availability zones from config: [{}], client: {}", + fmt::join(collections::map(availability_zones, [](auto s){ return DB::quoteString(s); }), ", "), + DB::quoteString(args.client_availability_zone)); + if (args.availability_zone_autodetect) updateAvailabilityZones(); } diff --git a/src/Common/assert_cast.h b/src/Common/assert_cast.h index f9d0bf0e595..7a04372ffad 100644 --- a/src/Common/assert_cast.h +++ b/src/Common/assert_cast.h @@ -25,7 +25,7 @@ namespace DB template inline To assert_cast(From && from) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD try { if constexpr (std::is_pointer_v) diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp index 7e18a93e6ed..34a1add2f0e 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -37,12 +37,12 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count) /// cgroupsv2 if (cgroupsV2Enabled()) { - /// First, we identify the cgroup the process belongs - std::string cgroup = cgroupV2OfProcess(); - if (cgroup.empty()) + /// First, we identify the path of the cgroup the process belongs + std::filesystem::path cgroup_path = cgroupV2PathOfProcess(); + if (cgroup_path.empty()) return default_cpu_count; - auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + auto current_cgroup = cgroup_path; // Looking for cpu.max in directories from the current cgroup to the top level // It does not stop on the first time since the child could have a greater value than parent @@ -62,7 +62,7 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count) } current_cgroup = current_cgroup.parent_path(); } - current_cgroup = default_cgroups_mount / cgroup; + current_cgroup = cgroup_path; // Looking for cpuset.cpus.effective in directories from the current cgroup to the top level while (current_cgroup != default_cgroups_mount.parent_path()) { diff --git a/src/Common/getRandomASCIIString.cpp b/src/Common/getRandomASCIIString.cpp index 594b4cd3228..a295277b453 100644 --- a/src/Common/getRandomASCIIString.cpp +++ b/src/Common/getRandomASCIIString.cpp @@ -6,12 +6,17 @@ namespace DB { String getRandomASCIIString(size_t length) +{ + return getRandomASCIIString(length, thread_local_rng); +} + +String getRandomASCIIString(size_t length, pcg64 & rng) { std::uniform_int_distribution distribution('a', 'z'); String res; res.resize(length); for (auto & c : res) - c = distribution(thread_local_rng); + c = distribution(rng); return res; } diff --git a/src/Common/getRandomASCIIString.h b/src/Common/getRandomASCIIString.h index 627d2700ce3..19e1ff7120e 100644 --- a/src/Common/getRandomASCIIString.h +++ b/src/Common/getRandomASCIIString.h @@ -2,11 +2,14 @@ #include +#include + namespace DB { /// Slow random string. Useful for random names and things like this. Not for generating data. String getRandomASCIIString(size_t length); +String getRandomASCIIString(size_t length, pcg64 & rng); } diff --git a/src/Common/memory.h b/src/Common/memory.h index caa0418fa56..d673f954fb2 100644 --- a/src/Common/memory.h +++ b/src/Common/memory.h @@ -37,7 +37,7 @@ requires DB::OptionalArgument inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align) { #if USE_GWP_ASAN - if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) + if (unlikely(GWPAsan::shouldSample())) { if constexpr (sizeof...(TAlign) == 1) { @@ -80,10 +80,10 @@ inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align) throw std::bad_alloc{}; } -inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept +inline ALWAYS_INLINE void * newNoExcept(std::size_t size) noexcept { #if USE_GWP_ASAN - if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) + if (unlikely(GWPAsan::shouldSample())) { if (void * ptr = GWPAsan::GuardedAlloc.allocate(size)) { @@ -99,10 +99,10 @@ inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept return malloc(size); } -inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) noexcept +inline ALWAYS_INLINE void * newNoExcept(std::size_t size, std::align_val_t align) noexcept { #if USE_GWP_ASAN - if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) + if (unlikely(GWPAsan::shouldSample())) { if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignToSizeT(align))) { diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index e8151fbe201..80e05fc4ea0 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -87,7 +87,7 @@ void * operator new(std::size_t size, const std::nothrow_t &) noexcept { AllocationTrace trace; std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newNoExept(size); + void * ptr = Memory::newNoExcept(size); trace.onAlloc(ptr, actual_size); return ptr; } @@ -96,7 +96,7 @@ void * operator new[](std::size_t size, const std::nothrow_t &) noexcept { AllocationTrace trace; std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newNoExept(size); + void * ptr = Memory::newNoExcept(size); trace.onAlloc(ptr, actual_size); return ptr; } @@ -105,7 +105,7 @@ void * operator new(std::size_t size, std::align_val_t align, const std::nothrow { AllocationTrace trace; std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newNoExept(size, align); + void * ptr = Memory::newNoExcept(size, align); trace.onAlloc(ptr, actual_size); return ptr; } @@ -114,7 +114,7 @@ void * operator new[](std::size_t size, std::align_val_t align, const std::nothr { AllocationTrace trace; std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newNoExept(size, align); + void * ptr = Memory::newNoExcept(size, align); trace.onAlloc(ptr, actual_size); return ptr; } diff --git a/src/Common/tests/gtest_rw_lock.cpp b/src/Common/tests/gtest_rw_lock.cpp index d8c6e9cb99d..9b0c9aeafbe 100644 --- a/src/Common/tests/gtest_rw_lock.cpp +++ b/src/Common/tests/gtest_rw_lock.cpp @@ -166,7 +166,7 @@ TEST(Common, RWLockRecursive) auto lock2 = fifo_lock->getLock(RWLockImpl::Read, "q2"); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD /// It throws LOGICAL_ERROR EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write, "q2");}); #endif diff --git a/src/Common/tests/gtest_shell_command.cpp b/src/Common/tests/gtest_shell_command.cpp index d6d0a544e9b..0ea96da9da2 100644 --- a/src/Common/tests/gtest_shell_command.cpp +++ b/src/Common/tests/gtest_shell_command.cpp @@ -54,16 +54,3 @@ TEST(ShellCommand, ExecuteWithInput) EXPECT_EQ(res, "Hello, world!\n"); } - -TEST(ShellCommand, AutoWait) -{ - // hunting: - for (int i = 0; i < 1000; ++i) - { - auto command = ShellCommand::execute("echo " + std::to_string(i)); - //command->wait(); // now automatic - } - - // std::cerr << "inspect me: ps auxwwf\n"; - // std::this_thread::sleep_for(std::chrono::seconds(100)); -} diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index 6e23a56ef97..35a23fc9e78 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -55,6 +55,7 @@ struct Settings; M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \ M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \ M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \ + M(Bool, experimental_use_rocksdb, false, "Use rocksdb as backend storage", 0) \ M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \ M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \ M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 8de9f8dfa1c..d0ecbe744a0 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index 7589e3393be..daa6a4bcb3f 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -183,8 +183,6 @@ M(ReadBufferFromS3InitMicroseconds) \ M(ReadBufferFromS3Bytes) \ M(ReadBufferFromS3RequestsErrors) \ - M(ReadBufferFromS3ResetSessions) \ - M(ReadBufferFromS3PreservedSessions) \ \ M(WriteBufferFromS3Microseconds) \ M(WriteBufferFromS3Bytes) \ diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index a36a074ce89..1f66882ecad 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -3,19 +3,29 @@ #include +#include #include +#include +#include +#include +#include +#include +#include #include #include #include -#include #include -#include -#include -#include #include #include +#include "config.h" +#if USE_ROCKSDB +#include +#include +#include +#endif + namespace DB { @@ -23,6 +33,8 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; +extern const int LOGICAL_ERROR; +extern const int ROCKSDB_ERROR; } @@ -40,6 +52,95 @@ KeeperContext::KeeperContext(bool standalone_keeper_, CoordinationSettingsPtr co system_nodes_with_data[keeper_api_version_path] = toString(static_cast(KeeperApiVersion::WITH_MULTI_READ)); } +#if USE_ROCKSDB +using RocksDBOptions = std::unordered_map; + +static RocksDBOptions getOptionsFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & path) +{ + RocksDBOptions options; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(path, keys); + + for (const auto & key : keys) + { + const String key_path = path + "." + key; + options[key] = config.getString(key_path); + } + + return options; +} + +static rocksdb::Options getRocksDBOptionsFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + rocksdb::Status status; + rocksdb::Options base; + + base.create_if_missing = true; + base.compression = rocksdb::CompressionType::kZSTD; + base.statistics = rocksdb::CreateDBStatistics(); + /// It is too verbose by default, and in fact we don't care about rocksdb logs at all. + base.info_log_level = rocksdb::ERROR_LEVEL; + + rocksdb::Options merged = base; + rocksdb::BlockBasedTableOptions table_options; + + if (config.has("keeper_server.rocksdb.options")) + { + auto config_options = getOptionsFromConfig(config, "keeper_server.rocksdb.options"); + status = rocksdb::GetDBOptionsFromMap(merged, config_options, &merged); + if (!status.ok()) + { + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.options' : {}", + status.ToString()); + } + } + if (config.has("rocksdb.column_family_options")) + { + auto column_family_options = getOptionsFromConfig(config, "rocksdb.column_family_options"); + status = rocksdb::GetColumnFamilyOptionsFromMap(merged, column_family_options, &merged); + if (!status.ok()) + { + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.column_family_options' at: {}", status.ToString()); + } + } + if (config.has("rocksdb.block_based_table_options")) + { + auto block_based_table_options = getOptionsFromConfig(config, "rocksdb.block_based_table_options"); + status = rocksdb::GetBlockBasedTableOptionsFromMap(table_options, block_based_table_options, &table_options); + if (!status.ok()) + { + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Fail to merge rocksdb options from 'rocksdb.block_based_table_options' at: {}", status.ToString()); + } + } + + merged.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options)); + return merged; +} +#endif + +KeeperContext::Storage KeeperContext::getRocksDBPathFromConfig(const Poco::Util::AbstractConfiguration & config) const +{ + const auto create_local_disk = [](const auto & path) + { + if (fs::exists(path)) + fs::remove_all(path); + fs::create_directories(path); + + return std::make_shared("LocalRocksDBDisk", path); + }; + if (config.has("keeper_server.rocksdb_path")) + return create_local_disk(config.getString("keeper_server.rocksdb_path")); + + if (config.has("keeper_server.storage_path")) + return create_local_disk(std::filesystem::path{config.getString("keeper_server.storage_path")} / "rocksdb"); + + if (standalone_keeper) + return create_local_disk(std::filesystem::path{config.getString("path", KEEPER_DEFAULT_PATH)} / "rocksdb"); + else + return create_local_disk(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/rocksdb"); +} + void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, KeeperDispatcher * dispatcher_) { dispatcher = dispatcher_; @@ -58,6 +159,14 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config, initializeFeatureFlags(config); initializeDisks(config); + + #if USE_ROCKSDB + if (config.getBool("keeper_server.coordination_settings.experimental_use_rocksdb", false)) + { + rocksdb_options = std::make_shared(getRocksDBOptionsFromConfig(config)); + digest_enabled = false; /// TODO: support digest + } + #endif } namespace @@ -93,6 +202,8 @@ void KeeperContext::initializeDisks(const Poco::Util::AbstractConfiguration & co { disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance(), diskValidator); + rocksdb_storage = getRocksDBPathFromConfig(config); + log_storage = getLogsPathFromConfig(config); if (config.has("keeper_server.latest_log_storage_disk")) @@ -261,6 +372,37 @@ void KeeperContext::dumpConfiguration(WriteBufferFromOwnString & buf) const } } + +void KeeperContext::setRocksDBDisk(DiskPtr disk) +{ + rocksdb_storage = std::move(disk); +} + +DiskPtr KeeperContext::getTemporaryRocksDBDisk() const +{ + DiskPtr rocksdb_disk = getDisk(rocksdb_storage); + if (!rocksdb_disk) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "rocksdb storage is not initialized"); + } + auto uuid_str = formatUUID(UUIDHelpers::generateV4()); + String path_to_create = "rocks_" + std::string(uuid_str.data(), uuid_str.size()); + rocksdb_disk->createDirectory(path_to_create); + return std::make_shared("LocalTmpRocksDBDisk", fullPath(rocksdb_disk, path_to_create)); +} + +void KeeperContext::setRocksDBOptions(std::shared_ptr rocksdb_options_) +{ + if (rocksdb_options_ != nullptr) + rocksdb_options = rocksdb_options_; + else + { + #if USE_ROCKSDB + rocksdb_options = std::make_shared(getRocksDBOptionsFromConfig(Poco::Util::JSONConfiguration())); + #endif + } +} + KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const { const auto create_local_disk = [](const auto & path) diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index e283e65dffa..38013725f56 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -6,6 +6,11 @@ #include #include +namespace rocksdb +{ +struct Options; +} + namespace DB { @@ -62,6 +67,12 @@ public: constexpr KeeperDispatcher * getDispatcher() const { return dispatcher; } + void setRocksDBDisk(DiskPtr disk); + DiskPtr getTemporaryRocksDBDisk() const; + + void setRocksDBOptions(std::shared_ptr rocksdb_options_ = nullptr); + std::shared_ptr getRocksDBOptions() const { return rocksdb_options; } + UInt64 getKeeperMemorySoftLimit() const { return memory_soft_limit; } void updateKeeperMemorySoftLimit(const Poco::Util::AbstractConfiguration & config); @@ -90,6 +101,7 @@ private: void initializeFeatureFlags(const Poco::Util::AbstractConfiguration & config); void initializeDisks(const Poco::Util::AbstractConfiguration & config); + Storage getRocksDBPathFromConfig(const Poco::Util::AbstractConfiguration & config) const; Storage getLogsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const; Storage getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config) const; Storage getStatePathFromConfig(const Poco::Util::AbstractConfiguration & config) const; @@ -111,12 +123,15 @@ private: std::shared_ptr disk_selector; + Storage rocksdb_storage; Storage log_storage; Storage latest_log_storage; Storage snapshot_storage; Storage latest_snapshot_storage; Storage state_file_storage; + std::shared_ptr rocksdb_options; + std::vector old_log_disk_names; std::vector old_snapshot_disk_names; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index fc72e5fab59..4a350077596 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -5,8 +5,8 @@ #include #include -#include "Common/ZooKeeper/IKeeper.h" -#include "Common/ZooKeeper/ZooKeeperCommon.h" +#include +#include #include #include #include @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -116,13 +117,13 @@ void KeeperDispatcher::requestThread() RaftAppendResult prev_result = nullptr; /// Requests from previous iteration. We store them to be able /// to send errors to the client. - KeeperStorage::RequestsForSessions prev_batch; + KeeperStorageBase::RequestsForSessions prev_batch; const auto & shutdown_called = keeper_context->isShutdownCalled(); while (!shutdown_called) { - KeeperStorage::RequestForSession request; + KeeperStorageBase::RequestForSession request; auto coordination_settings = configuration_and_settings->coordination_settings; uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds(); @@ -152,7 +153,7 @@ void KeeperDispatcher::requestThread() continue; } - KeeperStorage::RequestsForSessions current_batch; + KeeperStorageBase::RequestsForSessions current_batch; size_t current_batch_bytes_size = 0; bool has_read_request = false; @@ -310,7 +311,7 @@ void KeeperDispatcher::responseThread() const auto & shutdown_called = keeper_context->isShutdownCalled(); while (!shutdown_called) { - KeeperStorage::ResponseForSession response_for_session; + KeeperStorageBase::ResponseForSession response_for_session; uint64_t max_wait = configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds(); @@ -401,7 +402,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ return false; } - KeeperStorage::RequestForSession request_info; + KeeperStorageBase::RequestForSession request_info; request_info.request = request; using namespace std::chrono; request_info.time = duration_cast(system_clock::now().time_since_epoch()).count(); @@ -447,7 +448,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf snapshots_queue, keeper_context, snapshot_s3, - [this](uint64_t /*log_idx*/, const KeeperStorage::RequestForSession & request_for_session) + [this](uint64_t /*log_idx*/, const KeeperStorageBase::RequestForSession & request_for_session) { { /// check if we have queue of read requests depending on this request to be committed @@ -539,7 +540,7 @@ void KeeperDispatcher::shutdown() update_configuration_thread.join(); } - KeeperStorage::RequestForSession request_for_session; + KeeperStorageBase::RequestForSession request_for_session; /// Set session expired for all pending requests while (requests_queue && requests_queue->tryPop(request_for_session)) @@ -550,7 +551,7 @@ void KeeperDispatcher::shutdown() setResponse(request_for_session.session_id, response); } - KeeperStorage::RequestsForSessions close_requests; + KeeperStorageBase::RequestsForSessions close_requests; { /// Clear all registered sessions std::lock_guard lock(session_to_response_callback_mutex); @@ -564,7 +565,7 @@ void KeeperDispatcher::shutdown() auto request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close); request->xid = Coordination::CLOSE_XID; using namespace std::chrono; - KeeperStorage::RequestForSession request_info + KeeperStorageBase::RequestForSession request_info { .session_id = session, .time = duration_cast(system_clock::now().time_since_epoch()).count(), @@ -662,7 +663,7 @@ void KeeperDispatcher::sessionCleanerTask() auto request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Close); request->xid = Coordination::CLOSE_XID; using namespace std::chrono; - KeeperStorage::RequestForSession request_info + KeeperStorageBase::RequestForSession request_info { .session_id = dead_session, .time = duration_cast(system_clock::now().time_since_epoch()).count(), @@ -710,16 +711,16 @@ void KeeperDispatcher::finishSession(int64_t session_id) } } -void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error) +void KeeperDispatcher::addErrorResponses(const KeeperStorageBase::RequestsForSessions & requests_for_sessions, Coordination::Error error) { for (const auto & request_for_session : requests_for_sessions) { - KeeperStorage::ResponsesForSessions responses; + KeeperStorageBase::ResponsesForSessions responses; auto response = request_for_session.request->makeResponse(); response->xid = request_for_session.request->xid; response->zxid = 0; response->error = error; - if (!responses_queue.push(DB::KeeperStorage::ResponseForSession{request_for_session.session_id, response})) + if (!responses_queue.push(DB::KeeperStorageBase::ResponseForSession{request_for_session.session_id, response})) throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push error response xid {} zxid {} error message {} to responses queue", response->xid, @@ -729,7 +730,7 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession } nuraft::ptr KeeperDispatcher::forceWaitAndProcessResult( - RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success) + RaftAppendResult & result, KeeperStorageBase::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success) { if (!result->has_result()) result->get(); @@ -754,7 +755,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) { /// New session id allocation is a special request, because we cannot process it in normal /// way: get request -> put to raft -> set response for registered callback. - KeeperStorage::RequestForSession request_info; + KeeperStorageBase::RequestForSession request_info; std::shared_ptr request = std::make_shared(); /// Internal session id. It's a temporary number which is unique for each client on this server /// but can be same on different servers. diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index a487b886d98..651fd0e1c88 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -27,7 +26,7 @@ using ZooKeeperResponseCallback = std::function; + using RequestsQueue = ConcurrentBoundedQueue; using SessionToResponseCallback = std::unordered_map; using ClusterUpdateQueue = ConcurrentBoundedQueue; @@ -96,18 +95,18 @@ private: /// Add error responses for requests to responses queue. /// Clears requests. - void addErrorResponses(const KeeperStorage::RequestsForSessions & requests_for_sessions, Coordination::Error error); + void addErrorResponses(const KeeperStorageBase::RequestsForSessions & requests_for_sessions, Coordination::Error error); /// Forcefully wait for result and sets errors if something when wrong. /// Clears both arguments nuraft::ptr forceWaitAndProcessResult( - RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success); + RaftAppendResult & result, KeeperStorageBase::RequestsForSessions & requests_for_sessions, bool clear_requests_on_success); public: std::mutex read_request_queue_mutex; /// queue of read requests that can be processed after a request with specific session ID and XID is committed - std::unordered_map> read_request_queue; + std::unordered_map> read_request_queue; /// Just allocate some objects, real initialization is done by `intialize method` KeeperDispatcher(); @@ -193,7 +192,7 @@ public: Keeper4LWInfo getKeeper4LWInfo() const; - const KeeperStateMachine & getStateMachine() const + const IKeeperStateMachine & getStateMachine() const { return *server->getKeeperStateMachine(); } diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 09a1132e96d..d40e5ef2e50 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -3,9 +3,7 @@ #include "config.h" -#include -#include -#include +#include #include #include #include @@ -30,6 +28,10 @@ #include #include +#include +#include +#include + #pragma clang diagnostic ignored "-Wdeprecated-declarations" #include @@ -121,7 +123,7 @@ KeeperServer::KeeperServer( SnapshotsQueue & snapshots_queue_, KeeperContextPtr keeper_context_, KeeperSnapshotManagerS3 & snapshot_manager_s3, - KeeperStateMachine::CommitCallback commit_callback) + IKeeperStateMachine::CommitCallback commit_callback) : server_id(configuration_and_settings_->server_id) , log(getLogger("KeeperServer")) , is_recovering(config.getBool("keeper_server.force_recovery", false)) @@ -132,13 +134,28 @@ KeeperServer::KeeperServer( if (keeper_context->getCoordinationSettings()->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); - state_machine = nuraft::cs_new( - responses_queue_, - snapshots_queue_, - keeper_context, - config.getBool("keeper_server.upload_snapshot_on_exit", false) ? &snapshot_manager_s3 : nullptr, - commit_callback, - checkAndGetSuperdigest(configuration_and_settings_->super_digest)); +#if USE_ROCKSDB + const auto & coordination_settings = keeper_context->getCoordinationSettings(); + if (coordination_settings->experimental_use_rocksdb) + { + state_machine = nuraft::cs_new>( + responses_queue_, + snapshots_queue_, + keeper_context, + config.getBool("keeper_server.upload_snapshot_on_exit", false) ? &snapshot_manager_s3 : nullptr, + commit_callback, + checkAndGetSuperdigest(configuration_and_settings_->super_digest)); + LOG_WARNING(log, "Use RocksDB as Keeper backend storage."); + } + else +#endif + state_machine = nuraft::cs_new>( + responses_queue_, + snapshots_queue_, + keeper_context, + config.getBool("keeper_server.upload_snapshot_on_exit", false) ? &snapshot_manager_s3 : nullptr, + commit_callback, + checkAndGetSuperdigest(configuration_and_settings_->super_digest)); state_manager = nuraft::cs_new( server_id, @@ -520,7 +537,7 @@ namespace { // Serialize the request for the log entry -nuraft::ptr getZooKeeperLogEntry(const KeeperStorage::RequestForSession & request_for_session) +nuraft::ptr getZooKeeperLogEntry(const KeeperStorageBase::RequestForSession & request_for_session) { DB::WriteBufferFromNuraftBuffer write_buf; DB::writeIntBinary(request_for_session.session_id, write_buf); @@ -528,7 +545,7 @@ nuraft::ptr getZooKeeperLogEntry(const KeeperStorage::RequestFor DB::writeIntBinary(request_for_session.time, write_buf); /// we fill with dummy values to eliminate unnecessary copy later on when we will write correct values DB::writeIntBinary(static_cast(0), write_buf); /// zxid - DB::writeIntBinary(KeeperStorage::DigestVersion::NO_DIGEST, write_buf); /// digest version or NO_DIGEST flag + DB::writeIntBinary(KeeperStorageBase::DigestVersion::NO_DIGEST, write_buf); /// digest version or NO_DIGEST flag DB::writeIntBinary(static_cast(0), write_buf); /// digest value /// if new fields are added, update KeeperStateMachine::ZooKeeperLogSerializationVersion along with parseRequest function and PreAppendLog callback handler return write_buf.getBuffer(); @@ -536,7 +553,7 @@ nuraft::ptr getZooKeeperLogEntry(const KeeperStorage::RequestFor } -void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession & request_for_session) +void KeeperServer::putLocalReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) { if (!request_for_session.request->isReadRequest()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot process non-read request locally"); @@ -544,7 +561,7 @@ void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession & state_machine->processReadRequest(request_for_session); } -RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions) +RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorageBase::RequestsForSessions & requests_for_sessions) { std::vector> entries; entries.reserve(requests_for_sessions.size()); @@ -787,7 +804,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ auto entry_buf = entry->get_buf_ptr(); - KeeperStateMachine::ZooKeeperLogSerializationVersion serialization_version; + IKeeperStateMachine::ZooKeeperLogSerializationVersion serialization_version; auto request_for_session = state_machine->parseRequest(*entry_buf, /*final=*/false, &serialization_version); request_for_session->zxid = next_zxid; if (!state_machine->preprocess(*request_for_session)) @@ -797,10 +814,10 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ /// older versions of Keeper can send logs that are missing some fields size_t bytes_missing = 0; - if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) + if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) bytes_missing += sizeof(request_for_session->time); - if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_ZXID_DIGEST) + if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_ZXID_DIGEST) bytes_missing += sizeof(request_for_session->zxid) + sizeof(request_for_session->digest->version) + sizeof(request_for_session->digest->value); if (bytes_missing != 0) @@ -814,19 +831,19 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ size_t write_buffer_header_size = sizeof(request_for_session->zxid) + sizeof(request_for_session->digest->version) + sizeof(request_for_session->digest->value); - if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) + if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) write_buffer_header_size += sizeof(request_for_session->time); auto * buffer_start = reinterpret_cast(entry_buf->data_begin() + entry_buf->size() - write_buffer_header_size); WriteBufferFromPointer write_buf(buffer_start, write_buffer_header_size); - if (serialization_version < KeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) + if (serialization_version < IKeeperStateMachine::ZooKeeperLogSerializationVersion::WITH_TIME) writeIntBinary(request_for_session->time, write_buf); writeIntBinary(request_for_session->zxid, write_buf); writeIntBinary(request_for_session->digest->version, write_buf); - if (request_for_session->digest->version != KeeperStorage::NO_DIGEST) + if (request_for_session->digest->version != KeeperStorageBase::NO_DIGEST) writeIntBinary(request_for_session->digest->value, write_buf); write_buf.finalize(); diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 5e45a552cba..f082b5d377e 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -17,12 +16,15 @@ namespace DB using RaftAppendResult = nuraft::ptr>>; +struct KeeperConfigurationAndSettings; +using KeeperConfigurationAndSettingsPtr = std::shared_ptr; + class KeeperServer { private: const int server_id; - nuraft::ptr state_machine; + nuraft::ptr state_machine; nuraft::ptr state_manager; @@ -77,26 +79,26 @@ public: SnapshotsQueue & snapshots_queue_, KeeperContextPtr keeper_context_, KeeperSnapshotManagerS3 & snapshot_manager_s3, - KeeperStateMachine::CommitCallback commit_callback); + IKeeperStateMachine::CommitCallback commit_callback); /// Load state machine from the latest snapshot and load log storage. Start NuRaft with required settings. void startup(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6 = true); /// Put local read request and execute in state machine directly and response into /// responses queue - void putLocalReadRequest(const KeeperStorage::RequestForSession & request); + void putLocalReadRequest(const KeeperStorageBase::RequestForSession & request); bool isRecovering() const { return is_recovering; } bool reconfigEnabled() const { return enable_reconfiguration; } /// Put batch of requests into Raft and get result of put. Responses will be set separately into /// responses_queue. - RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests); + RaftAppendResult putRequestBatch(const KeeperStorageBase::RequestsForSessions & requests); /// Return set of the non-active sessions std::vector getDeadSessions(); - nuraft::ptr getKeeperStateMachine() const { return state_machine; } + nuraft::ptr getKeeperStateMachine() const { return state_machine; } void forceRecovery(); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 8bf42b434cd..3f5ac055470 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -66,7 +66,8 @@ namespace return base; } - void writeNode(const KeeperStorage::Node & node, SnapshotVersion version, WriteBuffer & out) + template + void writeNode(const Node & node, SnapshotVersion version, WriteBuffer & out) { writeBinary(node.getData(), out); @@ -86,7 +87,7 @@ namespace writeBinary(node.aversion, out); writeBinary(node.ephemeralOwner(), out); if (version < SnapshotVersion::V6) - writeBinary(static_cast(node.data_size), out); + writeBinary(static_cast(node.getData().size()), out); writeBinary(node.numChildren(), out); writeBinary(node.pzxid, out); @@ -96,7 +97,8 @@ namespace writeBinary(node.sizeInBytes(), out); } - void readNode(KeeperStorage::Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map) + template + void readNode(Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map) { readVarUInt(node.data_size, in); if (node.data_size != 0) @@ -195,7 +197,8 @@ namespace } } -void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context) +template +void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context) { writeBinary(static_cast(snapshot.version), out); serializeSnapshotMetadata(snapshot.snapshot_meta, out); @@ -205,11 +208,11 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr writeBinary(snapshot.zxid, out); if (keeper_context->digestEnabled()) { - writeBinary(static_cast(KeeperStorage::CURRENT_DIGEST_VERSION), out); + writeBinary(static_cast(Storage::CURRENT_DIGEST_VERSION), out); writeBinary(snapshot.nodes_digest, out); } else - writeBinary(static_cast(KeeperStorage::NO_DIGEST), out); + writeBinary(static_cast(Storage::NO_DIGEST), out); } writeBinary(snapshot.session_id, out); @@ -255,7 +258,6 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr /// slightly bigger than required. if (node.mzxid > snapshot.zxid) break; - writeBinary(path, out); writeNode(node, snapshot.version, out); @@ -282,7 +284,7 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr writeBinary(session_id, out); writeBinary(timeout, out); - KeeperStorage::AuthIDs ids; + KeeperStorageBase::AuthIDs ids; if (snapshot.session_and_auth.contains(session_id)) ids = snapshot.session_and_auth.at(session_id); @@ -303,7 +305,8 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr } } -void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context) +template +void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context) { uint8_t version; readBinary(version, in); @@ -312,7 +315,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported snapshot version {}", version); deserialization_result.snapshot_meta = deserializeSnapshotMetadata(in); - KeeperStorage & storage = *deserialization_result.storage; + Storage & storage = *deserialization_result.storage; bool recalculate_digest = keeper_context->digestEnabled(); if (version >= SnapshotVersion::V5) @@ -320,11 +323,11 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial readBinary(storage.zxid, in); uint8_t digest_version; readBinary(digest_version, in); - if (digest_version != KeeperStorage::DigestVersion::NO_DIGEST) + if (digest_version != Storage::DigestVersion::NO_DIGEST) { uint64_t nodes_digest; readBinary(nodes_digest, in); - if (digest_version == KeeperStorage::CURRENT_DIGEST_VERSION) + if (digest_version == Storage::CURRENT_DIGEST_VERSION) { storage.nodes_digest = nodes_digest; recalculate_digest = false; @@ -374,8 +377,8 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial size_t snapshot_container_size; readBinary(snapshot_container_size, in); - - storage.container.reserve(snapshot_container_size); + if constexpr (!use_rocksdb) + storage.container.reserve(snapshot_container_size); if (recalculate_digest) storage.nodes_digest = 0; @@ -389,7 +392,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial in.readStrict(path_data.get(), path_size); std::string_view path{path_data.get(), path_size}; - KeeperStorage::Node node{}; + typename Storage::Node node{}; readNode(node, in, current_version, storage.acl_map); using enum Coordination::PathMatchResult; @@ -421,7 +424,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial if (keeper_context->ignoreSystemPathOnStartup() || keeper_context->getServerState() != KeeperContext::Phase::INIT) { LOG_ERROR(getLogger("KeeperSnapshotManager"), "{}. Ignoring it", get_error_msg()); - node = KeeperStorage::Node{}; + node = typename Storage::Node{}; } else throw Exception( @@ -433,8 +436,9 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial } auto ephemeral_owner = node.ephemeralOwner(); - if (!node.isEphemeral() && node.numChildren() > 0) - node.getChildren().reserve(node.numChildren()); + if constexpr (!use_rocksdb) + if (!node.isEphemeral() && node.numChildren() > 0) + node.getChildren().reserve(node.numChildren()); if (ephemeral_owner != 0) storage.ephemerals[node.ephemeralOwner()].insert(std::string{path}); @@ -447,36 +451,38 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial LOG_TRACE(getLogger("KeeperSnapshotManager"), "Building structure for children nodes"); - for (const auto & itr : storage.container) + if constexpr (!use_rocksdb) { - if (itr.key != "/") + for (const auto & itr : storage.container) { - auto parent_path = parentNodePath(itr.key); - storage.container.updateValue( - parent_path, [path = itr.key](KeeperStorage::Node & value) { value.addChild(getBaseNodeName(path)); }); - } - } - - for (const auto & itr : storage.container) - { - if (itr.key != "/") - { - if (itr.value.numChildren() != static_cast(itr.value.getChildren().size())) + if (itr.key != "/") { + auto parent_path = parentNodePath(itr.key); + storage.container.updateValue( + parent_path, [path = itr.key](typename Storage::Node & value) { value.addChild(getBaseNodeName(path)); }); + } + } + + for (const auto & itr : storage.container) + { + if (itr.key != "/") + { + if (itr.value.numChildren() != static_cast(itr.value.getChildren().size())) + { #ifdef NDEBUG - /// TODO (alesapin) remove this, it should be always CORRUPTED_DATA. - LOG_ERROR(getLogger("KeeperSnapshotManager"), "Children counter in stat.numChildren {}" - " is different from actual children size {} for node {}", itr.value.numChildren(), itr.value.getChildren().size(), itr.key); + /// TODO (alesapin) remove this, it should be always CORRUPTED_DATA. + LOG_ERROR(getLogger("KeeperSnapshotManager"), "Children counter in stat.numChildren {}" + " is different from actual children size {} for node {}", itr.value.numChildren(), itr.value.getChildren().size(), itr.key); #else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}" - " is different from actual children size {} for node {}", - itr.value.numChildren(), itr.value.getChildren().size(), itr.key); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}" + " is different from actual children size {} for node {}", + itr.value.numChildren(), itr.value.getChildren().size(), itr.key); #endif + } } } } - size_t active_sessions_size; readBinary(active_sessions_size, in); @@ -493,14 +499,14 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial size_t session_auths_size; readBinary(session_auths_size, in); - KeeperStorage::AuthIDs ids; + typename Storage::AuthIDs ids; size_t session_auth_counter = 0; while (session_auth_counter < session_auths_size) { String scheme, id; readBinary(scheme, in); readBinary(id, in); - ids.emplace_back(KeeperStorage::AuthID{scheme, id}); + ids.emplace_back(typename Storage::AuthID{scheme, id}); session_auth_counter++; } @@ -523,7 +529,8 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial } } -KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_) +template +KeeperStorageSnapshot::KeeperStorageSnapshot(Storage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_) : storage(storage_) , snapshot_meta(std::make_shared(up_to_log_idx_, 0, std::make_shared())) , session_id(storage->session_id_counter) @@ -540,8 +547,9 @@ KeeperStorageSnapshot::KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t session_and_auth = storage->session_and_auth; } -KeeperStorageSnapshot::KeeperStorageSnapshot( - KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_) +template +KeeperStorageSnapshot::KeeperStorageSnapshot( + Storage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_) : storage(storage_) , snapshot_meta(snapshot_meta_) , session_id(storage->session_id_counter) @@ -558,12 +566,14 @@ KeeperStorageSnapshot::KeeperStorageSnapshot( session_and_auth = storage->session_and_auth; } -KeeperStorageSnapshot::~KeeperStorageSnapshot() +template +KeeperStorageSnapshot::~KeeperStorageSnapshot() { storage->disableSnapshotMode(); } -KeeperSnapshotManager::KeeperSnapshotManager( +template +KeeperSnapshotManager::KeeperSnapshotManager( size_t snapshots_to_keep_, const KeeperContextPtr & keeper_context_, bool compress_snapshots_zstd_, @@ -651,7 +661,8 @@ KeeperSnapshotManager::KeeperSnapshotManager( moveSnapshotsIfNeeded(); } -SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) +template +SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx) { ReadBufferFromNuraftBuffer reader(buffer); @@ -680,7 +691,8 @@ SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotBufferToDisk(nuraft: return snapshot_file_info; } -nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk() +template +nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBufferFromDisk() { while (!existing_snapshots.empty()) { @@ -701,7 +713,8 @@ nuraft::ptr KeeperSnapshotManager::deserializeLatestSnapshotBuff return nullptr; } -nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const +template +nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const { const auto & [snapshot_path, snapshot_disk, size] = *existing_snapshots.at(up_to_log_idx); WriteBufferFromNuraftBuffer writer; @@ -710,7 +723,8 @@ nuraft::ptr KeeperSnapshotManager::deserializeSnapshotBufferFrom return writer.getBuffer(); } -nuraft::ptr KeeperSnapshotManager::serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const +template +nuraft::ptr KeeperSnapshotManager::serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const { std::unique_ptr writer = std::make_unique(); auto * buffer_raw_ptr = writer.get(); @@ -720,13 +734,13 @@ nuraft::ptr KeeperSnapshotManager::serializeSnapshotToBuffer(con else compressed_writer = std::make_unique(*writer); - KeeperStorageSnapshot::serialize(snapshot, *compressed_writer, keeper_context); + KeeperStorageSnapshot::serialize(snapshot, *compressed_writer, keeper_context); compressed_writer->finalize(); return buffer_raw_ptr->getBuffer(); } - -bool KeeperSnapshotManager::isZstdCompressed(nuraft::ptr buffer) +template +bool KeeperSnapshotManager::isZstdCompressed(nuraft::ptr buffer) { static constexpr unsigned char ZSTD_COMPRESSED_MAGIC[4] = {0x28, 0xB5, 0x2F, 0xFD}; @@ -737,7 +751,8 @@ bool KeeperSnapshotManager::isZstdCompressed(nuraft::ptr buffer) return memcmp(magic_from_buffer, ZSTD_COMPRESSED_MAGIC, 4) == 0; } -SnapshotDeserializationResult KeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr buffer) const +template +SnapshotDeserializationResult KeeperSnapshotManager::deserializeSnapshotFromBuffer(nuraft::ptr buffer) const { bool is_zstd_compressed = isZstdCompressed(buffer); @@ -749,14 +764,15 @@ SnapshotDeserializationResult KeeperSnapshotManager::deserializeSnapshotFromBuff else compressed_reader = std::make_unique(*reader); - SnapshotDeserializationResult result; - result.storage = std::make_unique(storage_tick_time, superdigest, keeper_context, /* initialize_system_nodes */ false); - KeeperStorageSnapshot::deserialize(result, *compressed_reader, keeper_context); + SnapshotDeserializationResult result; + result.storage = std::make_unique(storage_tick_time, superdigest, keeper_context, /* initialize_system_nodes */ false); + KeeperStorageSnapshot::deserialize(result, *compressed_reader, keeper_context); result.storage->initializeSystemNodes(); return result; } -SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot() +template +SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot() { if (existing_snapshots.empty()) return {}; @@ -767,23 +783,27 @@ SnapshotDeserializationResult KeeperSnapshotManager::restoreFromLatestSnapshot() return deserializeSnapshotFromBuffer(buffer); } -DiskPtr KeeperSnapshotManager::getDisk() const +template +DiskPtr KeeperSnapshotManager::getDisk() const { return keeper_context->getSnapshotDisk(); } -DiskPtr KeeperSnapshotManager::getLatestSnapshotDisk() const +template +DiskPtr KeeperSnapshotManager::getLatestSnapshotDisk() const { return keeper_context->getLatestSnapshotDisk(); } -void KeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded() +template +void KeeperSnapshotManager::removeOutdatedSnapshotsIfNeeded() { while (existing_snapshots.size() > snapshots_to_keep) removeSnapshot(existing_snapshots.begin()->first); } -void KeeperSnapshotManager::moveSnapshotsIfNeeded() +template +void KeeperSnapshotManager::moveSnapshotsIfNeeded() { /// move snapshots to correct disks @@ -813,7 +833,8 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded() } -void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) +template +void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) { auto itr = existing_snapshots.find(log_idx); if (itr == existing_snapshots.end()) @@ -823,7 +844,8 @@ void KeeperSnapshotManager::removeSnapshot(uint64_t log_idx) existing_snapshots.erase(itr); } -SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot) +template +SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot) { auto up_to_log_idx = snapshot.snapshot_meta->get_last_log_idx(); auto snapshot_file_name = getSnapshotFileName(up_to_log_idx, compress_snapshots_zstd); @@ -842,7 +864,7 @@ SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperS else compressed_writer = std::make_unique(*writer); - KeeperStorageSnapshot::serialize(snapshot, *compressed_writer, keeper_context); + KeeperStorageSnapshot::serialize(snapshot, *compressed_writer, keeper_context); compressed_writer->finalize(); compressed_writer->sync(); @@ -864,14 +886,16 @@ SnapshotFileInfoPtr KeeperSnapshotManager::serializeSnapshotToDisk(const KeeperS return snapshot_file_info; } -size_t KeeperSnapshotManager::getLatestSnapshotIndex() const +template +size_t KeeperSnapshotManager::getLatestSnapshotIndex() const { if (!existing_snapshots.empty()) return existing_snapshots.rbegin()->first; return 0; } -SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const +template +SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const { if (!existing_snapshots.empty()) { @@ -890,4 +914,10 @@ SnapshotFileInfoPtr KeeperSnapshotManager::getLatestSnapshotInfo() const return nullptr; } +template struct KeeperStorageSnapshot; +template class KeeperSnapshotManager; +#if USE_ROCKSDB +template struct KeeperStorageSnapshot; +template class KeeperSnapshotManager; +#endif } diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index 36bf6c8775c..be9ce386ab1 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -34,10 +34,11 @@ enum SnapshotVersion : uint8_t static constexpr auto CURRENT_SNAPSHOT_VERSION = SnapshotVersion::V6; /// What is stored in binary snapshot +template struct SnapshotDeserializationResult { /// Storage - KeeperStoragePtr storage; + std::unique_ptr storage; /// Snapshot metadata (up_to_log_idx and so on) SnapshotMetadataPtr snapshot_meta; /// Cluster config @@ -52,21 +53,31 @@ struct SnapshotDeserializationResult /// /// This representation of snapshot have to be serialized into NuRaft /// buffer and send over network or saved to file. +template struct KeeperStorageSnapshot { +#if USE_ROCKSDB + static constexpr bool use_rocksdb = std::is_same_v; +#else + static constexpr bool use_rocksdb = false; +#endif + public: - KeeperStorageSnapshot(KeeperStorage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_ = nullptr); + KeeperStorageSnapshot(Storage * storage_, uint64_t up_to_log_idx_, const ClusterConfigPtr & cluster_config_ = nullptr); KeeperStorageSnapshot( - KeeperStorage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_ = nullptr); + Storage * storage_, const SnapshotMetadataPtr & snapshot_meta_, const ClusterConfigPtr & cluster_config_ = nullptr); + + KeeperStorageSnapshot(const KeeperStorageSnapshot&) = delete; + KeeperStorageSnapshot(KeeperStorageSnapshot&&) = default; ~KeeperStorageSnapshot(); - static void serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context); + static void serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context); - static void deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context); + static void deserialize(SnapshotDeserializationResult & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context); - KeeperStorage * storage; + Storage * storage; SnapshotVersion version = CURRENT_SNAPSHOT_VERSION; /// Snapshot metadata @@ -77,11 +88,11 @@ public: /// so we have for loop for (i = 0; i < snapshot_container_size; ++i) { doSmth(begin + i); } size_t snapshot_container_size; /// Iterator to the start of the storage - KeeperStorage::Container::const_iterator begin; + Storage::Container::const_iterator begin; /// Active sessions and their timeouts SessionAndTimeout session_and_timeout; /// Sessions credentials - KeeperStorage::SessionAndAuth session_and_auth; + Storage::SessionAndAuth session_and_auth; /// ACLs cache for better performance. Without we cannot deserialize storage. std::unordered_map acl_map; /// Cluster config from snapshot, can be empty @@ -105,14 +116,16 @@ struct SnapshotFileInfo }; using SnapshotFileInfoPtr = std::shared_ptr; - -using KeeperStorageSnapshotPtr = std::shared_ptr; -using CreateSnapshotCallback = std::function(KeeperStorageSnapshotPtr &&, bool)>; - -using SnapshotMetaAndStorage = std::pair; +#if USE_ROCKSDB +using KeeperStorageSnapshotPtr = std::variant>, std::shared_ptr>>; +#else +using KeeperStorageSnapshotPtr = std::variant>>; +#endif +using CreateSnapshotCallback = std::function; /// Class responsible for snapshots serialization and deserialization. Each snapshot /// has it's path on disk and log index. +template class KeeperSnapshotManager { public: @@ -124,18 +137,18 @@ public: size_t storage_tick_time_ = 500); /// Restore storage from latest available snapshot - SnapshotDeserializationResult restoreFromLatestSnapshot(); + SnapshotDeserializationResult restoreFromLatestSnapshot(); /// Compress snapshot and serialize it to buffer - nuraft::ptr serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const; + nuraft::ptr serializeSnapshotToBuffer(const KeeperStorageSnapshot & snapshot) const; /// Serialize already compressed snapshot to disk (return path) SnapshotFileInfoPtr serializeSnapshotBufferToDisk(nuraft::buffer & buffer, uint64_t up_to_log_idx); /// Serialize snapshot directly to disk - SnapshotFileInfoPtr serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot); + SnapshotFileInfoPtr serializeSnapshotToDisk(const KeeperStorageSnapshot & snapshot); - SnapshotDeserializationResult deserializeSnapshotFromBuffer(nuraft::ptr buffer) const; + SnapshotDeserializationResult deserializeSnapshotFromBuffer(nuraft::ptr buffer) const; /// Deserialize snapshot with log index up_to_log_idx from disk into compressed nuraft buffer. nuraft::ptr deserializeSnapshotBufferFromDisk(uint64_t up_to_log_idx) const; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 3d3d862e1dd..a4aa1b18746 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -KeeperStateMachine::KeeperStateMachine( +IKeeperStateMachine::IKeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const KeeperContextPtr & keeper_context_, @@ -51,12 +52,6 @@ KeeperStateMachine::KeeperStateMachine( CommitCallback commit_callback_, const std::string & superdigest_) : commit_callback(commit_callback_) - , snapshot_manager( - keeper_context_->getCoordinationSettings()->snapshots_to_keep, - keeper_context_, - keeper_context_->getCoordinationSettings()->compress_snapshots_with_zstd_format, - superdigest_, - keeper_context_->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds()) , responses_queue(responses_queue_) , snapshots_queue(snapshots_queue_) , min_request_size_to_cache(keeper_context_->getCoordinationSettings()->min_request_size_for_cache) @@ -67,6 +62,32 @@ KeeperStateMachine::KeeperStateMachine( { } +template +KeeperStateMachine::KeeperStateMachine( + ResponsesQueue & responses_queue_, + SnapshotsQueue & snapshots_queue_, + // const CoordinationSettingsPtr & coordination_settings_, + const KeeperContextPtr & keeper_context_, + KeeperSnapshotManagerS3 * snapshot_manager_s3_, + IKeeperStateMachine::CommitCallback commit_callback_, + const std::string & superdigest_) + : IKeeperStateMachine( + responses_queue_, + snapshots_queue_, + /// coordination_settings_, + keeper_context_, + snapshot_manager_s3_, + commit_callback_, + superdigest_), + snapshot_manager( + keeper_context_->getCoordinationSettings()->snapshots_to_keep, + keeper_context_, + keeper_context_->getCoordinationSettings()->compress_snapshots_with_zstd_format, + superdigest_, + keeper_context_->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds()) +{ +} + namespace { @@ -77,7 +98,8 @@ bool isLocalDisk(const IDisk & disk) } -void KeeperStateMachine::init() +template +void KeeperStateMachine::init() { /// Do everything without mutexes, no other threads exist. LOG_DEBUG(log, "Totally have {} snapshots", snapshot_manager.totalSnapshots()); @@ -122,7 +144,7 @@ void KeeperStateMachine::init() LOG_DEBUG(log, "No existing snapshots, last committed log index {}", last_committed_idx); if (!storage) - storage = std::make_unique( + storage = std::make_unique( keeper_context->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds(), superdigest, keeper_context); } @@ -130,13 +152,13 @@ namespace { void assertDigest( - const KeeperStorage::Digest & expected, - const KeeperStorage::Digest & actual, + const KeeperStorageBase::Digest & expected, + const KeeperStorageBase::Digest & actual, const Coordination::ZooKeeperRequest & request, uint64_t log_idx, bool committing) { - if (!KeeperStorage::checkDigest(expected, actual)) + if (!KeeperStorageBase::checkDigest(expected, actual)) { LOG_FATAL( getLogger("KeeperStateMachine"), @@ -169,7 +191,8 @@ struct TSA_SCOPED_LOCKABLE LockGuardWithStats final } -nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) +template +nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nuraft::buffer & data) { auto result = nuraft::buffer::alloc(sizeof(log_idx)); nuraft::buffer_serializer ss(result); @@ -190,10 +213,10 @@ nuraft::ptr KeeperStateMachine::pre_commit(uint64_t log_idx, nur return result; } -std::shared_ptr KeeperStateMachine::parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version) +std::shared_ptr IKeeperStateMachine::parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version) { ReadBufferFromNuraftBuffer buffer(data); - auto request_for_session = std::make_shared(); + auto request_for_session = std::make_shared(); readIntBinary(request_for_session->session_id, buffer); int32_t length; @@ -266,7 +289,7 @@ std::shared_ptr KeeperStateMachine::parseReque request_for_session->digest.emplace(); readIntBinary(request_for_session->digest->version, buffer); - if (request_for_session->digest->version != KeeperStorage::DigestVersion::NO_DIGEST || !buffer.eof()) + if (request_for_session->digest->version != KeeperStorageBase::DigestVersion::NO_DIGEST || !buffer.eof()) readIntBinary(request_for_session->digest->value, buffer); } @@ -282,7 +305,8 @@ std::shared_ptr KeeperStateMachine::parseReque return request_for_session; } -bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & request_for_session) +template +bool KeeperStateMachine::preprocess(const KeeperStorageBase::RequestForSession & request_for_session) { const auto op_num = request_for_session.request->getOpNum(); if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) @@ -316,10 +340,11 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req return true; } -void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& request_for_session) +template +void KeeperStateMachine::reconfigure(const KeeperStorageBase::RequestForSession& request_for_session) { LockGuardWithStats lock(storage_and_responses_lock); - KeeperStorage::ResponseForSession response = processReconfiguration(request_for_session); + KeeperStorageBase::ResponseForSession response = processReconfiguration(request_for_session); if (!responses_queue.push(response)) { ProfileEvents::increment(ProfileEvents::KeeperCommitsFailed); @@ -329,8 +354,9 @@ void KeeperStateMachine::reconfigure(const KeeperStorage::RequestForSession& req } } -KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( - const KeeperStorage::RequestForSession & request_for_session) +template +KeeperStorageBase::ResponseForSession KeeperStateMachine::processReconfiguration( + const KeeperStorageBase::RequestForSession & request_for_session) { ProfileEvents::increment(ProfileEvents::KeeperReconfigRequest); @@ -339,7 +365,7 @@ KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( const int64_t zxid = request_for_session.zxid; using enum Coordination::Error; - auto bad_request = [&](Coordination::Error code = ZBADARGUMENTS) -> KeeperStorage::ResponseForSession + auto bad_request = [&](Coordination::Error code = ZBADARGUMENTS) -> KeeperStorageBase::ResponseForSession { auto res = std::make_shared(); res->xid = request.xid; @@ -396,7 +422,8 @@ KeeperStorage::ResponseForSession KeeperStateMachine::processReconfiguration( return { session_id, std::move(response) }; } -nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) +template +nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) { auto request_for_session = parseRequest(data, true); if (!request_for_session->zxid) @@ -407,7 +434,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session)) return nullptr; - auto try_push = [&](const KeeperStorage::ResponseForSession & response) + auto try_push = [&](const KeeperStorageBase::ResponseForSession & response) { if (!responses_queue.push(response)) { @@ -429,7 +456,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n std::shared_ptr response = std::make_shared(); response->internal_id = session_id_request.internal_id; response->server_id = session_id_request.server_id; - KeeperStorage::ResponseForSession response_for_session; + KeeperStorageBase::ResponseForSession response_for_session; response_for_session.session_id = -1; response_for_session.response = response; response_for_session.request = request_for_session->request; @@ -450,7 +477,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n } LockGuardWithStats lock(storage_and_responses_lock); - KeeperStorage::ResponsesForSessions responses_for_sessions + KeeperStorageBase::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); for (auto & response_for_session : responses_for_sessions) @@ -481,7 +508,8 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n return nullptr; } -bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) +template +bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) { LOG_DEBUG(log, "Applying snapshot {}", s.get_last_log_idx()); nuraft::ptr latest_snapshot_ptr; @@ -508,7 +536,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) { /// deserialize and apply snapshot to storage LockGuardWithStats lock(storage_and_responses_lock); - SnapshotDeserializationResult snapshot_deserialization_result; + SnapshotDeserializationResult snapshot_deserialization_result; if (latest_snapshot_ptr) snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr); else @@ -529,7 +557,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) } -void KeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptr & new_conf) +void IKeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptr & new_conf) { std::lock_guard lock(cluster_config_lock); auto tmp = new_conf->serialize(); @@ -537,7 +565,7 @@ void KeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptrsetLastCommitIndex(log_idx); } -void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data) +void IKeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data) { /// Don't rollback anything until the first commit because nothing was preprocessed if (!keeper_context->localLogsPreprocessed()) @@ -553,7 +581,8 @@ void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data) rollbackRequest(*request_for_session, false); } -void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing) +template +void KeeperStateMachine::rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) { if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) return; @@ -562,7 +591,8 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession storage->rollbackRequest(request_for_session.zxid, allow_missing); } -void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing) +template +void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) { if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) return; @@ -570,14 +600,15 @@ void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorage::RequestForSe storage->rollbackRequest(request_for_session.zxid, allow_missing); } -nuraft::ptr KeeperStateMachine::last_snapshot() +nuraft::ptr IKeeperStateMachine::last_snapshot() { /// Just return the latest snapshot. std::lock_guard lock(snapshots_lock); return latest_snapshot_meta; } -void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) +template +void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) { LOG_DEBUG(log, "Creating snapshot {}", s.get_last_log_idx()); @@ -586,14 +617,15 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res CreateSnapshotTask snapshot_task; { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. LockGuardWithStats lock(storage_and_responses_lock); - snapshot_task.snapshot = std::make_shared(storage.get(), snapshot_meta_copy, getClusterConfig()); + snapshot_task.snapshot = std::make_shared>(storage.get(), snapshot_meta_copy, getClusterConfig()); } /// create snapshot task for background execution (in snapshot thread) - snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot, bool execute_only_cleanup) + snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot_, bool execute_only_cleanup) { nuraft::ptr exception(nullptr); bool ret = false; + auto && snapshot = std::get>>(std::move(snapshot_)); if (!execute_only_cleanup) { try @@ -682,7 +714,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res LOG_WARNING(log, "Cannot push snapshot task into queue"); } -void KeeperStateMachine::save_logical_snp_obj( +template +void KeeperStateMachine::save_logical_snp_obj( nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool /*is_first_obj*/, bool /*is_last_obj*/) { LOG_DEBUG(log, "Saving snapshot {} obj_id {}", s.get_last_log_idx(), obj_id); @@ -747,7 +780,7 @@ static int bufferFromFile(LoggerPtr log, const std::string & path, nuraft::ptr & data_out, bool & is_last_obj) { LOG_DEBUG(log, "Reading snapshot {} obj_id {}", s.get_last_log_idx(), obj_id); @@ -787,7 +820,8 @@ int KeeperStateMachine::read_logical_snp_obj( return 1; } -void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSession & request_for_session) +template +void KeeperStateMachine::processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) { /// Pure local request, just process it with storage LockGuardWithStats lock(storage_and_responses_lock); @@ -803,103 +837,120 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi } } -void KeeperStateMachine::shutdownStorage() +template +void KeeperStateMachine::shutdownStorage() { LockGuardWithStats lock(storage_and_responses_lock); storage->finalize(); } -std::vector KeeperStateMachine::getDeadSessions() +template +std::vector KeeperStateMachine::getDeadSessions() { LockGuardWithStats lock(storage_and_responses_lock); return storage->getDeadSessions(); } -int64_t KeeperStateMachine::getNextZxid() const +template +int64_t KeeperStateMachine::getNextZxid() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getNextZXID(); } -KeeperStorage::Digest KeeperStateMachine::getNodesDigest() const +template +KeeperStorageBase::Digest KeeperStateMachine::getNodesDigest() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesDigest(false); } -uint64_t KeeperStateMachine::getLastProcessedZxid() const +template +uint64_t KeeperStateMachine::getLastProcessedZxid() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getZXID(); } -uint64_t KeeperStateMachine::getNodesCount() const +template +uint64_t KeeperStateMachine::getNodesCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesCount(); } -uint64_t KeeperStateMachine::getTotalWatchesCount() const +template +uint64_t KeeperStateMachine::getTotalWatchesCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalWatchesCount(); } -uint64_t KeeperStateMachine::getWatchedPathsCount() const +template +uint64_t KeeperStateMachine::getWatchedPathsCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getWatchedPathsCount(); } -uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const +template +uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionsWithWatchesCount(); } -uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const +template +uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalEphemeralNodesCount(); } -uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const +template +uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionWithEphemeralNodesCount(); } -void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const +template +void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const { LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatches(buf); } -void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const +template +void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const { LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatchesByPath(buf); } -void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const +template +void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const { LockGuardWithStats lock(storage_and_responses_lock); storage->dumpSessionsAndEphemerals(buf); } -uint64_t KeeperStateMachine::getApproximateDataSize() const +template +uint64_t KeeperStateMachine::getApproximateDataSize() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getApproximateDataSize(); } -uint64_t KeeperStateMachine::getKeyArenaSize() const +template +uint64_t KeeperStateMachine::getKeyArenaSize() const { LockGuardWithStats lock(storage_and_responses_lock); return storage->getArenaDataSize(); } -uint64_t KeeperStateMachine::getLatestSnapshotSize() const +template +uint64_t KeeperStateMachine::getLatestSnapshotSize() const { auto snapshot_info = [&] { @@ -922,7 +973,7 @@ uint64_t KeeperStateMachine::getLatestSnapshotSize() const return size; } -ClusterConfigPtr KeeperStateMachine::getClusterConfig() const +ClusterConfigPtr IKeeperStateMachine::getClusterConfig() const { std::lock_guard lock(cluster_config_lock); if (cluster_config) @@ -934,11 +985,18 @@ ClusterConfigPtr KeeperStateMachine::getClusterConfig() const return nullptr; } -void KeeperStateMachine::recalculateStorageStats() +template +void KeeperStateMachine::recalculateStorageStats() { LockGuardWithStats lock(storage_and_responses_lock); LOG_INFO(log, "Recalculating storage stats"); storage->recalculateStats(); LOG_INFO(log, "Done recalculating storage stats"); } + +template class KeeperStateMachine; +#if USE_ROCKSDB +template class KeeperStateMachine; +#endif + } diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 7ea14aa2d30..6afd413d782 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -12,26 +11,24 @@ namespace DB { -using ResponsesQueue = ConcurrentBoundedQueue; +using ResponsesQueue = ConcurrentBoundedQueue; using SnapshotsQueue = ConcurrentBoundedQueue; -/// ClickHouse Keeper state machine. Wrapper for KeeperStorage. -/// Responsible for entries commit, snapshots creation and so on. -class KeeperStateMachine : public nuraft::state_machine +class IKeeperStateMachine : public nuraft::state_machine { public: - using CommitCallback = std::function; + using CommitCallback = std::function; - KeeperStateMachine( + IKeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, const KeeperContextPtr & keeper_context_, KeeperSnapshotManagerS3 * snapshot_manager_s3_, - CommitCallback commit_callback_ = {}, - const std::string & superdigest_ = ""); + CommitCallback commit_callback_, + const std::string & superdigest_); /// Read state from the latest snapshot - void init(); + virtual void init() = 0; enum ZooKeeperLogSerializationVersion { @@ -48,89 +45,66 @@ public: /// /// final - whether it's the final time we will fetch the request so we can safely remove it from cache /// serialization_version - information about which fields were parsed from the buffer so we can modify the buffer accordingly - std::shared_ptr parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version = nullptr); + std::shared_ptr parseRequest(nuraft::buffer & data, bool final, ZooKeeperLogSerializationVersion * serialization_version = nullptr); - bool preprocess(const KeeperStorage::RequestForSession & request_for_session); + virtual bool preprocess(const KeeperStorageBase::RequestForSession & request_for_session) = 0; - nuraft::ptr pre_commit(uint64_t log_idx, nuraft::buffer & data) override; - - nuraft::ptr commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT - - /// Save new cluster config to our snapshot (copy of the config stored in StateManager) void commit_config(const uint64_t log_idx, nuraft::ptr & new_conf) override; /// NOLINT void rollback(uint64_t log_idx, nuraft::buffer & data) override; // allow_missing - whether the transaction we want to rollback can be missing from storage // (can happen in case of exception during preprocessing) - void rollbackRequest(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing); - - void rollbackRequestNoLock( - const KeeperStorage::RequestForSession & request_for_session, - bool allow_missing) TSA_NO_THREAD_SAFETY_ANALYSIS; + virtual void rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) = 0; uint64_t last_commit_index() override { return keeper_context->lastCommittedIndex(); } - /// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state. - bool apply_snapshot(nuraft::snapshot & s) override; - nuraft::ptr last_snapshot() override; /// Create new snapshot from current state. - void create_snapshot(nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) override; + void create_snapshot(nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) override = 0; /// Save snapshot which was send by leader to us. After that we will apply it in apply_snapshot. - void save_logical_snp_obj(nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool is_first_obj, bool is_last_obj) override; + void save_logical_snp_obj(nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool is_first_obj, bool is_last_obj) override = 0; - /// Better name is `serialize snapshot` -- save existing snapshot (created by create_snapshot) into - /// in-memory buffer data_out. int read_logical_snp_obj( nuraft::snapshot & s, void *& user_snp_ctx, uint64_t obj_id, nuraft::ptr & data_out, bool & is_last_obj) override; - // This should be used only for tests or keeper-data-dumper because it violates - // TSA -- we can't acquire the lock outside of this class or return a storage under lock - // in a reasonable way. - KeeperStorage & getStorageUnsafe() TSA_NO_THREAD_SAFETY_ANALYSIS - { - return *storage; - } - - void shutdownStorage(); + virtual void shutdownStorage() = 0; ClusterConfigPtr getClusterConfig() const; - /// Process local read request - void processReadRequest(const KeeperStorage::RequestForSession & request_for_session); + virtual void processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) = 0; - std::vector getDeadSessions(); + virtual std::vector getDeadSessions() = 0; - int64_t getNextZxid() const; + virtual int64_t getNextZxid() const = 0; - KeeperStorage::Digest getNodesDigest() const; + virtual KeeperStorageBase::Digest getNodesDigest() const = 0; /// Introspection functions for 4lw commands - uint64_t getLastProcessedZxid() const; + virtual uint64_t getLastProcessedZxid() const = 0; - uint64_t getNodesCount() const; - uint64_t getTotalWatchesCount() const; - uint64_t getWatchedPathsCount() const; - uint64_t getSessionsWithWatchesCount() const; + virtual uint64_t getNodesCount() const = 0; + virtual uint64_t getTotalWatchesCount() const = 0; + virtual uint64_t getWatchedPathsCount() const = 0; + virtual uint64_t getSessionsWithWatchesCount() const = 0; - void dumpWatches(WriteBufferFromOwnString & buf) const; - void dumpWatchesByPath(WriteBufferFromOwnString & buf) const; - void dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const; + virtual void dumpWatches(WriteBufferFromOwnString & buf) const = 0; + virtual void dumpWatchesByPath(WriteBufferFromOwnString & buf) const = 0; + virtual void dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const = 0; - uint64_t getSessionWithEphemeralNodesCount() const; - uint64_t getTotalEphemeralNodesCount() const; - uint64_t getApproximateDataSize() const; - uint64_t getKeyArenaSize() const; - uint64_t getLatestSnapshotSize() const; + virtual uint64_t getSessionWithEphemeralNodesCount() const = 0; + virtual uint64_t getTotalEphemeralNodesCount() const = 0; + virtual uint64_t getApproximateDataSize() const = 0; + virtual uint64_t getKeyArenaSize() const = 0; + virtual uint64_t getLatestSnapshotSize() const = 0; - void recalculateStorageStats(); + virtual void recalculateStorageStats() = 0; - void reconfigure(const KeeperStorage::RequestForSession& request_for_session); + virtual void reconfigure(const KeeperStorageBase::RequestForSession& request_for_session) = 0; -private: +protected: CommitCallback commit_callback; /// In our state machine we always have a single snapshot which is stored /// in memory in compressed (serialized) format. @@ -138,12 +112,9 @@ private: std::shared_ptr latest_snapshot_info; nuraft::ptr latest_snapshot_buf = nullptr; - /// Main state machine logic - KeeperStoragePtr storage TSA_PT_GUARDED_BY(storage_and_responses_lock); + CoordinationSettingsPtr coordination_settings; /// Save/Load and Serialize/Deserialize logic for snapshots. - KeeperSnapshotManager snapshot_manager; - /// Put processed responses into this queue ResponsesQueue & responses_queue; @@ -160,7 +131,7 @@ private: /// for request. mutable std::mutex storage_and_responses_lock; - std::unordered_map>> parsed_request_cache; + std::unordered_map>> parsed_request_cache; uint64_t min_request_size_to_cache{0}; /// we only need to protect the access to the map itself /// requests can be modified from anywhere without lock because a single request @@ -182,7 +153,104 @@ private: KeeperSnapshotManagerS3 * snapshot_manager_s3; - KeeperStorage::ResponseForSession processReconfiguration(const KeeperStorage::RequestForSession & request_for_session) - TSA_REQUIRES(storage_and_responses_lock); + virtual KeeperStorageBase::ResponseForSession processReconfiguration( + const KeeperStorageBase::RequestForSession& request_for_session) + TSA_REQUIRES(storage_and_responses_lock) = 0; + +}; + +/// ClickHouse Keeper state machine. Wrapper for KeeperStorage. +/// Responsible for entries commit, snapshots creation and so on. +template +class KeeperStateMachine : public IKeeperStateMachine +{ +public: + /// using CommitCallback = std::function; + + KeeperStateMachine( + ResponsesQueue & responses_queue_, + SnapshotsQueue & snapshots_queue_, + /// const CoordinationSettingsPtr & coordination_settings_, + const KeeperContextPtr & keeper_context_, + KeeperSnapshotManagerS3 * snapshot_manager_s3_, + CommitCallback commit_callback_ = {}, + const std::string & superdigest_ = ""); + + /// Read state from the latest snapshot + void init() override; + + bool preprocess(const KeeperStorageBase::RequestForSession & request_for_session) override; + + nuraft::ptr pre_commit(uint64_t log_idx, nuraft::buffer & data) override; + + nuraft::ptr commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT + + // allow_missing - whether the transaction we want to rollback can be missing from storage + // (can happen in case of exception during preprocessing) + void rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) override; + + void rollbackRequestNoLock( + const KeeperStorageBase::RequestForSession & request_for_session, + bool allow_missing) TSA_NO_THREAD_SAFETY_ANALYSIS; + + /// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state. + bool apply_snapshot(nuraft::snapshot & s) override; + + /// Create new snapshot from current state. + void create_snapshot(nuraft::snapshot & s, nuraft::async_result::handler_type & when_done) override; + + /// Save snapshot which was send by leader to us. After that we will apply it in apply_snapshot. + void save_logical_snp_obj(nuraft::snapshot & s, uint64_t & obj_id, nuraft::buffer & data, bool is_first_obj, bool is_last_obj) override; + + // This should be used only for tests or keeper-data-dumper because it violates + // TSA -- we can't acquire the lock outside of this class or return a storage under lock + // in a reasonable way. + Storage & getStorageUnsafe() TSA_NO_THREAD_SAFETY_ANALYSIS + { + return *storage; + } + + void shutdownStorage() override; + + /// Process local read request + void processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) override; + + std::vector getDeadSessions() override; + + int64_t getNextZxid() const override; + + KeeperStorageBase::Digest getNodesDigest() const override; + + /// Introspection functions for 4lw commands + uint64_t getLastProcessedZxid() const override; + + uint64_t getNodesCount() const override; + uint64_t getTotalWatchesCount() const override; + uint64_t getWatchedPathsCount() const override; + uint64_t getSessionsWithWatchesCount() const override; + + void dumpWatches(WriteBufferFromOwnString & buf) const override; + void dumpWatchesByPath(WriteBufferFromOwnString & buf) const override; + void dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const override; + + uint64_t getSessionWithEphemeralNodesCount() const override; + uint64_t getTotalEphemeralNodesCount() const override; + uint64_t getApproximateDataSize() const override; + uint64_t getKeyArenaSize() const override; + uint64_t getLatestSnapshotSize() const override; + + void recalculateStorageStats() override; + + void reconfigure(const KeeperStorageBase::RequestForSession& request_for_session) override; + +private: + /// Main state machine logic + std::unique_ptr storage; //TSA_PT_GUARDED_BY(storage_and_responses_lock); + + /// Save/Load and Serialize/Deserialize logic for snapshots. + KeeperSnapshotManager snapshot_manager; + + KeeperStorageBase::ResponseForSession processReconfiguration(const KeeperStorageBase::RequestForSession & request_for_session) + TSA_REQUIRES(storage_and_responses_lock) override; }; } diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h index 60f6dbe7b62..e5ab5fb4807 100644 --- a/src/Coordination/KeeperStateManager.h +++ b/src/Coordination/KeeperStateManager.h @@ -1,14 +1,13 @@ #pragma once -#include #include #include -#include +#include +#include #include #include #include "Coordination/KeeperStateMachine.h" #include "Coordination/RaftServerConfig.h" -#include namespace DB { diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 1542eb0d71a..acdf209baae 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -20,11 +21,12 @@ #include #include +#include #include #include +#include #include #include -#include #include #include @@ -63,10 +65,11 @@ String getSHA1(const String & userdata) return String{digest_id.begin(), digest_id.end()}; } +template bool fixupACL( const std::vector & request_acls, int64_t session_id, - const KeeperStorage::UncommittedState & uncommitted_state, + const UncommittedState & uncommitted_state, std::vector & result_acls) { if (request_acls.empty()) @@ -79,7 +82,7 @@ bool fixupACL( { uncommitted_state.forEachAuthInSession( session_id, - [&](const KeeperStorage::AuthID & auth_id) + [&](const KeeperStorageBase::AuthID & auth_id) { valid_found = true; Coordination::ACL new_acl = request_acl; @@ -110,10 +113,10 @@ bool fixupACL( return valid_found; } -KeeperStorage::ResponsesForSessions processWatchesImpl( - const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) +KeeperStorageBase::ResponsesForSessions processWatchesImpl( + const String & path, KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches, Coordination::Event event_type) { - KeeperStorage::ResponsesForSessions result; + KeeperStorageBase::ResponsesForSessions result; auto watch_it = watches.find(path); if (watch_it != watches.end()) { @@ -124,7 +127,7 @@ KeeperStorage::ResponsesForSessions processWatchesImpl( watch_response->type = event_type; watch_response->state = Coordination::State::CONNECTED; for (auto watcher_session : watch_it->second) - result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_response}); + result.push_back(KeeperStorageBase::ResponseForSession{watcher_session, watch_response}); watches.erase(watch_it); } @@ -160,7 +163,7 @@ KeeperStorage::ResponsesForSessions processWatchesImpl( watch_list_response->state = Coordination::State::CONNECTED; for (auto watcher_session : watch_it->second) - result.push_back(KeeperStorage::ResponseForSession{watcher_session, watch_list_response}); + result.push_back(KeeperStorageBase::ResponseForSession{watcher_session, watch_list_response}); list_watches.erase(watch_it); } @@ -169,7 +172,8 @@ KeeperStorage::ResponsesForSessions processWatchesImpl( } // When this function is updated, update CURRENT_DIGEST_VERSION!! -uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node) +template +uint64_t calculateDigest(std::string_view path, const Node & node) { SipHash hash; @@ -204,7 +208,71 @@ uint64_t calculateDigest(std::string_view path, const KeeperStorage::Node & node } -KeeperStorage::Node & KeeperStorage::Node::operator=(const Node & other) +void KeeperRocksNodeInfo::copyStats(const Coordination::Stat & stat) +{ + czxid = stat.czxid; + mzxid = stat.mzxid; + pzxid = stat.pzxid; + + mtime = stat.mtime; + setCtime(stat.ctime); + + version = stat.version; + cversion = stat.cversion; + aversion = stat.aversion; + + if (stat.ephemeralOwner == 0) + { + is_ephemeral_and_ctime.is_ephemeral = false; + setNumChildren(stat.numChildren); + } + else + { + setEphemeralOwner(stat.ephemeralOwner); + } +} + +void KeeperRocksNode::invalidateDigestCache() const +{ + if (serialized) + throw Exception(ErrorCodes::LOGICAL_ERROR, "We modify node after serialized it"); + digest = 0; +} + +UInt64 KeeperRocksNode::getDigest(std::string_view path) const +{ + if (!digest) + digest = calculateDigest(path, *this); + return digest; +} + +String KeeperRocksNode::getEncodedString() +{ + if (serialized) + throw Exception(ErrorCodes::LOGICAL_ERROR, "We modify node after serialized it"); + serialized = true; + + WriteBufferFromOwnString buffer; + const KeeperRocksNodeInfo & node_info = *this; + writePODBinary(node_info, buffer); + writeBinary(getData(), buffer); + return buffer.str(); +} + +void KeeperRocksNode::decodeFromString(const String &buffer_str) +{ + ReadBufferFromOwnString buffer(buffer_str); + KeeperRocksNodeInfo & node_info = *this; + readPODBinary(node_info, buffer); + readVarUInt(data_size, buffer); + if (data_size) + { + data = std::unique_ptr(new char[data_size]); + buffer.readStrict(data.get(), data_size); + } +} + +KeeperMemNode & KeeperMemNode::operator=(const KeeperMemNode & other) { if (this == &other) return *this; @@ -232,12 +300,12 @@ KeeperStorage::Node & KeeperStorage::Node::operator=(const Node & other) return *this; } -KeeperStorage::Node::Node(const Node & other) +KeeperMemNode::KeeperMemNode(const KeeperMemNode & other) { *this = other; } -KeeperStorage::Node & KeeperStorage::Node::operator=(Node && other) noexcept +KeeperMemNode & KeeperMemNode::operator=(KeeperMemNode && other) noexcept { if (this == &other) return *this; @@ -264,17 +332,17 @@ KeeperStorage::Node & KeeperStorage::Node::operator=(Node && other) noexcept return *this; } -KeeperStorage::Node::Node(Node && other) noexcept +KeeperMemNode::KeeperMemNode(KeeperMemNode && other) noexcept { *this = std::move(other); } -bool KeeperStorage::Node::empty() const +bool KeeperMemNode::empty() const { return data_size == 0 && mzxid == 0; } -void KeeperStorage::Node::copyStats(const Coordination::Stat & stat) +void KeeperMemNode::copyStats(const Coordination::Stat & stat) { czxid = stat.czxid; mzxid = stat.mzxid; @@ -298,7 +366,7 @@ void KeeperStorage::Node::copyStats(const Coordination::Stat & stat) } } -void KeeperStorage::Node::setResponseStat(Coordination::Stat & response_stat) const +void KeeperMemNode::setResponseStat(Coordination::Stat & response_stat) const { response_stat.czxid = czxid; response_stat.mzxid = mzxid; @@ -314,12 +382,12 @@ void KeeperStorage::Node::setResponseStat(Coordination::Stat & response_stat) co } -uint64_t KeeperStorage::Node::sizeInBytes() const +uint64_t KeeperMemNode::sizeInBytes() const { - return sizeof(Node) + children.size() * sizeof(StringRef) + data_size; + return sizeof(KeeperMemNode) + children.size() * sizeof(StringRef) + data_size; } -void KeeperStorage::Node::setData(const String & new_data) +void KeeperMemNode::setData(const String & new_data) { data_size = static_cast(new_data.size()); if (data_size != 0) @@ -329,22 +397,22 @@ void KeeperStorage::Node::setData(const String & new_data) } } -void KeeperStorage::Node::addChild(StringRef child_path) +void KeeperMemNode::addChild(StringRef child_path) { children.insert(child_path); } -void KeeperStorage::Node::removeChild(StringRef child_path) +void KeeperMemNode::removeChild(StringRef child_path) { children.erase(child_path); } -void KeeperStorage::Node::invalidateDigestCache() const +void KeeperMemNode::invalidateDigestCache() const { cached_digest = 0; } -UInt64 KeeperStorage::Node::getDigest(const std::string_view path) const +UInt64 KeeperMemNode::getDigest(const std::string_view path) const { if (cached_digest == 0) cached_digest = calculateDigest(path, *this); @@ -352,7 +420,7 @@ UInt64 KeeperStorage::Node::getDigest(const std::string_view path) const return cached_digest; }; -void KeeperStorage::Node::shallowCopy(const KeeperStorage::Node & other) +void KeeperMemNode::shallowCopy(const KeeperMemNode & other) { czxid = other.czxid; mzxid = other.mzxid; @@ -379,19 +447,25 @@ void KeeperStorage::Node::shallowCopy(const KeeperStorage::Node & other) cached_digest = other.cached_digest; } -KeeperStorage::KeeperStorage( + +template +KeeperStorage::KeeperStorage( int64_t tick_time_ms, const String & superdigest_, const KeeperContextPtr & keeper_context_, const bool initialize_system_nodes) : session_expiry_queue(tick_time_ms), keeper_context(keeper_context_), superdigest(superdigest_) { + if constexpr (use_rocksdb) + container.initialize(keeper_context); Node root_node; container.insert("/", root_node); - addDigest(root_node, "/"); + if constexpr (!use_rocksdb) + addDigest(root_node, "/"); if (initialize_system_nodes) initializeSystemNodes(); } -void KeeperStorage::initializeSystemNodes() +template +void KeeperStorage::initializeSystemNodes() { if (initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes initialized twice"); @@ -403,21 +477,25 @@ void KeeperStorage::initializeSystemNodes() container.insert(keeper_system_path, system_node); // store digest for the empty node because we won't update // its stats - addDigest(system_node, keeper_system_path); + if constexpr (!use_rocksdb) + addDigest(system_node, keeper_system_path); // update root and the digest based on it auto current_root_it = container.find("/"); chassert(current_root_it != container.end()); - removeDigest(current_root_it->value, "/"); + if constexpr (!use_rocksdb) + removeDigest(current_root_it->value, "/"); auto updated_root_it = container.updateValue( "/", [](KeeperStorage::Node & node) { node.increaseNumChildren(); - node.addChild(getBaseNodeName(keeper_system_path)); + if constexpr (!use_rocksdb) + node.addChild(getBaseNodeName(keeper_system_path)); } ); - addDigest(updated_root_it->value, "/"); + if constexpr (!use_rocksdb) + addDigest(updated_root_it->value, "/"); } // insert child system nodes @@ -426,17 +504,22 @@ void KeeperStorage::initializeSystemNodes() chassert(path.starts_with(keeper_system_path)); Node child_system_node; child_system_node.setData(data); - auto [map_key, _] = container.insert(std::string{path}, child_system_node); - /// Take child path from key owned by map. - auto child_path = getBaseNodeName(map_key->getKey()); - container.updateValue( - parentNodePath(StringRef(path)), - [child_path](auto & parent) - { - // don't update stats so digest is okay - parent.addChild(child_path); - } - ); + if constexpr (use_rocksdb) + container.insert(std::string{path}, child_system_node); + else + { + auto [map_key, _] = container.insert(std::string{path}, child_system_node); + /// Take child path from key owned by map. + auto child_path = getBaseNodeName(map_key->getKey()); + container.updateValue( + parentNodePath(StringRef(path)), + [child_path](auto & parent) + { + // don't update stats so digest is okay + parent.addChild(child_path); + } + ); + } } initialized = true; @@ -453,12 +536,13 @@ struct Overloaded : Ts... template Overloaded(Ts...) -> Overloaded; -std::shared_ptr KeeperStorage::UncommittedState::tryGetNodeFromStorage(StringRef path) const +template +std::shared_ptr KeeperStorage::UncommittedState::tryGetNodeFromStorage(StringRef path) const { if (auto node_it = storage.container.find(path); node_it != storage.container.end()) { const auto & committed_node = node_it->value; - auto node = std::make_shared(); + auto node = std::make_shared::Node>(); node->shallowCopy(committed_node); return node; } @@ -466,7 +550,8 @@ std::shared_ptr KeeperStorage::UncommittedState::tryGetNode return nullptr; } -void KeeperStorage::UncommittedState::applyDelta(const Delta & delta) +template +void KeeperStorage::UncommittedState::applyDelta(const Delta & delta) { chassert(!delta.path.empty()); if (!nodes.contains(delta.path)) @@ -513,7 +598,8 @@ void KeeperStorage::UncommittedState::applyDelta(const Delta & delta) delta.operation); } -bool KeeperStorage::UncommittedState::hasACL(int64_t session_id, bool is_local, std::function predicate) const +template +bool KeeperStorage::UncommittedState::hasACL(int64_t session_id, bool is_local, std::function predicate) const { const auto check_auth = [&](const auto & auth_ids) { @@ -551,7 +637,8 @@ bool KeeperStorage::UncommittedState::hasACL(int64_t session_id, bool is_local, return check_auth(auth_it->second); } -void KeeperStorage::UncommittedState::addDelta(Delta new_delta) +template +void KeeperStorage::UncommittedState::addDelta(Delta new_delta) { const auto & added_delta = deltas.emplace_back(std::move(new_delta)); @@ -571,13 +658,15 @@ void KeeperStorage::UncommittedState::addDelta(Delta new_delta) } } -void KeeperStorage::UncommittedState::addDeltas(std::vector new_deltas) +template +void KeeperStorage::UncommittedState::addDeltas(std::vector new_deltas) { for (auto & delta : new_deltas) addDelta(std::move(delta)); } -void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) +template +void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) { chassert(deltas.empty() || deltas.front().zxid >= commit_zxid); @@ -637,7 +726,8 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) } } -void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid) +template +void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid) { // we can only rollback the last zxid (if there is any) // if there is a delta with a larger zxid, we have invalid state @@ -733,7 +823,8 @@ void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid) } } -std::shared_ptr KeeperStorage::UncommittedState::getNode(StringRef path) const +template +std::shared_ptr KeeperStorage::UncommittedState::getNode(StringRef path) const { if (auto node_it = nodes.find(path.toView()); node_it != nodes.end()) return node_it->second.node; @@ -741,7 +832,8 @@ std::shared_ptr KeeperStorage::UncommittedState::getNode(St return tryGetNodeFromStorage(path); } -Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) const +template +Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) const { if (auto node_it = nodes.find(path.toView()); node_it != nodes.end()) return node_it->second.acls; @@ -753,7 +845,8 @@ Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) cons return storage.acl_map.convertNumber(node_it->value.acl_id); } -void KeeperStorage::UncommittedState::forEachAuthInSession(int64_t session_id, std::function func) const +template +void KeeperStorage::UncommittedState::forEachAuthInSession(int64_t session_id, std::function func) const { const auto call_for_each_auth = [&func](const auto & auth_ids) { @@ -792,7 +885,8 @@ namespace } -void KeeperStorage::applyUncommittedState(KeeperStorage & other, int64_t last_log_idx) +template +void KeeperStorage::applyUncommittedState(KeeperStorage & other, int64_t last_log_idx) { std::unordered_set zxids_to_apply; for (const auto & transaction : uncommitted_transactions) @@ -818,7 +912,8 @@ void KeeperStorage::applyUncommittedState(KeeperStorage & other, int64_t last_lo } } -Coordination::Error KeeperStorage::commit(int64_t commit_zxid) +template +Coordination::Error KeeperStorage::commit(int64_t commit_zxid) { // Deltas are added with increasing ZXIDs // If there are no deltas for the commit_zxid (e.g. read requests), we instantly return @@ -832,7 +927,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) auto result = std::visit( [&, &path = delta.path](DeltaType & operation) -> Coordination::Error { - if constexpr (std::same_as) + if constexpr (std::same_as) { if (!createNode( path, @@ -843,7 +938,7 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) { auto node_it = container.find(path); if (node_it == container.end()) @@ -852,20 +947,22 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) if (operation.version != -1 && operation.version != node_it->value.version) onStorageInconsistency(); - removeDigest(node_it->value, path); + if constexpr (!use_rocksdb) + removeDigest(node_it->value, path); auto updated_node = container.updateValue(path, operation.update_fn); - addDigest(updated_node->value, path); + if constexpr (!use_rocksdb) + addDigest(updated_node->value, path); return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) { if (!removeNode(path, operation.version)) onStorageInconsistency(); return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) { auto node_it = container.find(path); if (node_it == container.end()) @@ -879,18 +976,18 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) uint64_t acl_id = acl_map.convertACLs(operation.acls); acl_map.addUsage(acl_id); - container.updateValue(path, [acl_id](KeeperStorage::Node & node) { node.acl_id = acl_id; }); + container.updateValue(path, [acl_id](Node & node) { node.acl_id = acl_id; }); return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) return operation.error; - else if constexpr (std::same_as) + else if constexpr (std::same_as) { finish_subdelta = true; return Coordination::Error::ZOK; } - else if constexpr (std::same_as) + else if constexpr (std::same_as) { session_and_auth[operation.session_id].emplace_back(std::move(operation.auth_id)); return Coordination::Error::ZOK; @@ -917,7 +1014,8 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) return Coordination::Error::ZOK; } -bool KeeperStorage::createNode( +template +bool KeeperStorage::createNode( const std::string & path, String data, const Coordination::Stat & stat, @@ -935,7 +1033,7 @@ bool KeeperStorage::createNode( if (container.contains(path)) return false; - KeeperStorage::Node created_node; + Node created_node; uint64_t acl_id = acl_map.convertACLs(node_acls); acl_map.addUsage(acl_id); @@ -943,23 +1041,31 @@ bool KeeperStorage::createNode( created_node.acl_id = acl_id; created_node.copyStats(stat); created_node.setData(data); - auto [map_key, _] = container.insert(path, created_node); - /// Take child path from key owned by map. - auto child_path = getBaseNodeName(map_key->getKey()); - container.updateValue( - parent_path, - [child_path](KeeperStorage::Node & parent) - { - parent.addChild(child_path); - chassert(parent.numChildren() == static_cast(parent.getChildren().size())); - } - ); + if constexpr (use_rocksdb) + { + container.insert(path, created_node); + } + else + { + auto [map_key, _] = container.insert(path, created_node); + /// Take child path from key owned by map. + auto child_path = getBaseNodeName(map_key->getKey()); + container.updateValue( + parent_path, + [child_path](KeeperMemNode & parent) + { + parent.addChild(child_path); + chassert(parent.numChildren() == static_cast(parent.getChildren().size())); + } + ); - addDigest(map_key->getMapped()->value, map_key->getKey().toView()); + addDigest(map_key->getMapped()->value, map_key->getKey().toView()); + } return true; }; -bool KeeperStorage::removeNode(const std::string & path, int32_t version) +template +bool KeeperStorage::removeNode(const std::string & path, int32_t version) { auto node_it = container.find(path); if (node_it == container.end()) @@ -975,71 +1081,84 @@ bool KeeperStorage::removeNode(const std::string & path, int32_t version) prev_node.shallowCopy(node_it->value); acl_map.removeUsage(node_it->value.acl_id); - container.updateValue( - parentNodePath(path), - [child_basename = getBaseNodeName(node_it->key)](KeeperStorage::Node & parent) - { - parent.removeChild(child_basename); - chassert(parent.numChildren() == static_cast(parent.getChildren().size())); - } - ); + if constexpr (use_rocksdb) + container.erase(path); + else + { + container.updateValue( + parentNodePath(path), + [child_basename = getBaseNodeName(node_it->key)](KeeperMemNode & parent) + { + parent.removeChild(child_basename); + chassert(parent.numChildren() == static_cast(parent.getChildren().size())); + } + ); - container.erase(path); + container.erase(path); - removeDigest(prev_node, path); + removeDigest(prev_node, path); + } return true; } +template struct KeeperStorageRequestProcessor { Coordination::ZooKeeperRequestPtr zk_request; explicit KeeperStorageRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) : zk_request(zk_request_) { } - virtual Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const = 0; - virtual std::vector - preprocess(KeeperStorage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const + + virtual Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const = 0; + + virtual std::vector + preprocess(Storage & /*storage*/, int64_t /*zxid*/, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const { return {}; } // process the request using locally committed data virtual Coordination::ZooKeeperResponsePtr - processLocal(KeeperStorage & /*storage*/, int64_t /*zxid*/) const + processLocal(Storage & /*storage*/, int64_t /*zxid*/) const { throw Exception{DB::ErrorCodes::LOGICAL_ERROR, "Cannot process the request locally"}; } - virtual KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & /*watches*/, KeeperStorage::Watches & /*list_watches*/) const + virtual KeeperStorageBase::ResponsesForSessions + processWatches(KeeperStorageBase::Watches & /*watches*/, KeeperStorageBase::Watches & /*list_watches*/) const { return {}; } - virtual bool checkAuth(KeeperStorage & /*storage*/, int64_t /*session_id*/, bool /*is_local*/) const { return true; } + + virtual bool checkAuth(Storage & /*storage*/, int64_t /*session_id*/, bool /*is_local*/) const { return true; } virtual ~KeeperStorageRequestProcessor() = default; }; -struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + Coordination::ZooKeeperResponsePtr - process(KeeperStorage & storage, int64_t zxid) const override + process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); response_ptr->error = storage.commit(zxid); return response_ptr; } }; -struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + Coordination::ZooKeeperResponsePtr - process(KeeperStorage & /* storage */, int64_t /* zxid */) const override + process(Storage & /* storage */, int64_t /* zxid */) const override { - auto response = zk_request->makeResponse(); + auto response = this->zk_request->makeResponse(); dynamic_cast(*response).path - = dynamic_cast(*zk_request).path; + = dynamic_cast(*this->zk_request).path; return response; } }; @@ -1047,7 +1166,8 @@ struct KeeperStorageSyncRequestProcessor final : public KeeperStorageRequestProc namespace { -Coordination::ACLs getNodeACLs(KeeperStorage & storage, StringRef path, bool is_local) +template +Coordination::ACLs getNodeACLs(Storage & storage, StringRef path, bool is_local) { if (is_local) { @@ -1075,7 +1195,8 @@ void handleSystemNodeModification(const KeeperContext & keeper_context, std::str } -bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session_id, bool is_local) +template +bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session_id, bool is_local) { const auto node_acls = getNodeACLs(*this, path, is_local); if (node_acls.empty()) @@ -1102,7 +1223,8 @@ bool KeeperStorage::checkACL(StringRef path, int32_t permission, int64_t session return false; } -void KeeperStorage::unregisterEphemeralPath(int64_t session_id, const std::string & path) +template +void KeeperStorage::unregisterEphemeralPath(int64_t session_id, const std::string & path) { auto ephemerals_it = ephemerals.find(session_id); if (ephemerals_it == ephemerals.end()) @@ -1113,43 +1235,44 @@ void KeeperStorage::unregisterEphemeralPath(int64_t session_id, const std::strin ephemerals.erase(ephemerals_it); } -struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorageBase::ResponsesForSessions + processWatches(KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches) const override { - return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); + return processWatchesImpl(this->zk_request->getPath(), watches, list_watches, Coordination::Event::CREATED); } - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - auto path = zk_request->getPath(); + auto path = this->zk_request->getPath(); return storage.checkACL(parentNodePath(path), Coordination::ACL::Create, session_id, is_local); } - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t session_id, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override { ProfileEvents::increment(ProfileEvents::KeeperCreateRequest); - Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*this->zk_request); - std::vector new_deltas; + std::vector new_deltas; auto parent_path = parentNodePath(request.path); auto parent_node = storage.uncommitted_state.getNode(parent_path); if (parent_node == nullptr) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; else if (parent_node->isEphemeral()) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNOCHILDRENFOREPHEMERALS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNOCHILDRENFOREPHEMERALS}}; std::string path_created = request.path; if (request.is_sequential) { if (request.not_exists) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; auto seq_num = parent_node->seqNum(); @@ -1165,30 +1288,30 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr auto error_msg = fmt::format("Trying to create a node inside the internal Keeper path ({}) which is not allowed. Path: {}", keeper_system_path, path_created); handleSystemNodeModification(keeper_context, error_msg); - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; } if (storage.uncommitted_state.getNode(path_created)) { - if (zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists) + if (this->zk_request->getOpNum() == Coordination::OpNum::CreateIfNotExists) return new_deltas; - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; } if (getBaseNodeName(path_created).size == 0) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; Coordination::ACLs node_acls; if (!fixupACL(request.acls, session_id, storage.uncommitted_state, node_acls)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZINVALIDACL}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZINVALIDACL}}; if (request.is_ephemeral) storage.ephemerals[session_id].emplace(path_created); int32_t parent_cversion = request.parent_cversion; - auto parent_update = [parent_cversion, zxid](KeeperStorage::Node & node) + auto parent_update = [parent_cversion, zxid](Storage::Node & node) { /// Increment sequential number even if node is not sequential node.increaseSeqNum(); @@ -1201,7 +1324,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr node.increaseNumChildren(); }; - new_deltas.emplace_back(std::string{parent_path}, zxid, KeeperStorage::UpdateNodeDelta{std::move(parent_update)}); + new_deltas.emplace_back(std::string{parent_path}, zxid, typename Storage::UpdateNodeDelta{std::move(parent_update)}); Coordination::Stat stat; stat.czxid = zxid; @@ -1218,20 +1341,20 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr new_deltas.emplace_back( std::move(path_created), zxid, - KeeperStorage::CreateNodeDelta{stat, std::move(node_acls), request.data}); + typename Storage::CreateNodeDelta{stat, std::move(node_acls), request.data}); digest = storage.calculateNodesDigest(digest, new_deltas); return new_deltas; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperCreateResponse & response = dynamic_cast(*response_ptr); if (storage.uncommitted_state.deltas.begin()->zxid != zxid) { - response.path_created = zk_request->getPath(); + response.path_created = this->zk_request->getPath(); response.error = Coordination::Error::ZOK; return response_ptr; } @@ -1247,7 +1370,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr deltas.begin(), deltas.end(), [zxid](const auto & delta) - { return delta.zxid == zxid && std::holds_alternative(delta.operation); }); + { return delta.zxid == zxid && std::holds_alternative(delta.operation); }); response.path_created = create_delta_it->path; response.error = Coordination::Error::ZOK; @@ -1255,20 +1378,21 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr } }; -struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); + return storage.checkACL(this->zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { ProfileEvents::increment(ProfileEvents::KeeperGetRequest); - Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperGetRequest & request = dynamic_cast(*this->zk_request); if (request.path == Coordination::keeper_api_feature_flags_path || request.path == Coordination::keeper_config_path @@ -1276,17 +1400,17 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce return {}; if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; return {}; } template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid) const + Coordination::ZooKeeperResponsePtr processImpl(Storage & storage, int64_t zxid) const { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperGetResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperGetRequest & request = dynamic_cast(*this->zk_request); if constexpr (!local) { @@ -1326,40 +1450,42 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { ProfileEvents::increment(ProfileEvents::KeeperGetRequest); return processImpl(storage, zxid); } }; -struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(parentNodePath(zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local); + return storage.checkACL(parentNodePath(this->zk_request->getPath()), Coordination::ACL::Delete, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override { ProfileEvents::increment(ProfileEvents::KeeperRemoveRequest); - Coordination::ZooKeeperRemoveRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperRemoveRequest & request = dynamic_cast(*this->zk_request); - std::vector new_deltas; + std::vector new_deltas; if (Coordination::matchPath(request.path, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH) { auto error_msg = fmt::format("Trying to delete an internal Keeper path ({}) which is not allowed", request.path); handleSystemNodeModification(keeper_context, error_msg); - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; } const auto update_parent_pzxid = [&]() @@ -1371,9 +1497,9 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr new_deltas.emplace_back( std::string{parent_path}, zxid, - KeeperStorage::UpdateNodeDelta + typename Storage::UpdateNodeDelta { - [zxid](KeeperStorage::Node & parent) + [zxid](Storage::Node & parent) { parent.pzxid = std::max(parent.pzxid, zxid); } @@ -1387,12 +1513,12 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr { if (request.restored_from_zookeeper_log) update_parent_pzxid(); - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; } else if (request.version != -1 && request.version != node->version) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADVERSION}}; else if (node->numChildren() != 0) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNOTEMPTY}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNOTEMPTY}}; if (request.restored_from_zookeeper_log) update_parent_pzxid(); @@ -1400,13 +1526,13 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr new_deltas.emplace_back( std::string{parentNodePath(request.path)}, zxid, - KeeperStorage::UpdateNodeDelta{[](KeeperStorage::Node & parent) + typename Storage::UpdateNodeDelta{[](typename Storage::Node & parent) { ++parent.cversion; parent.decreaseNumChildren(); }}); - new_deltas.emplace_back(request.path, zxid, KeeperStorage::RemoveNodeDelta{request.version, node->ephemeralOwner()}); + new_deltas.emplace_back(request.path, zxid, typename Storage::RemoveNodeDelta{request.version, node->ephemeralOwner()}); if (node->isEphemeral()) storage.unregisterEphemeralPath(node->ephemeralOwner(), request.path); @@ -1416,44 +1542,45 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr return new_deltas; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperRemoveResponse & response = dynamic_cast(*response_ptr); response.error = storage.commit(zxid); return response_ptr; } - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorageBase::ResponsesForSessions + processWatches(KeeperStorageBase::Watches & watches, KeeperStorageBase::Watches & list_watches) const override { - return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED); + return processWatchesImpl(this->zk_request->getPath(), watches, list_watches, Coordination::Event::DELETED); } }; -struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { ProfileEvents::increment(ProfileEvents::KeeperExistsRequest); - Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*this->zk_request); if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; return {}; } template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid) const + Coordination::ZooKeeperResponsePtr processImpl(Storage & storage, int64_t zxid) const { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperExistsResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*this->zk_request); if constexpr (!local) { @@ -1482,55 +1609,57 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr return response_ptr; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { ProfileEvents::increment(ProfileEvents::KeeperExistsRequest); return processImpl(storage, zxid); } }; -struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Write, session_id, is_local); + return storage.checkACL(this->zk_request->getPath(), Coordination::ACL::Write, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override { ProfileEvents::increment(ProfileEvents::KeeperSetRequest); - Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperSetRequest & request = dynamic_cast(*this->zk_request); - std::vector new_deltas; + std::vector new_deltas; if (Coordination::matchPath(request.path, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH) { auto error_msg = fmt::format("Trying to update an internal Keeper path ({}) which is not allowed", request.path); handleSystemNodeModification(keeper_context, error_msg); - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; } if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; auto node = storage.uncommitted_state.getNode(request.path); if (request.version != -1 && request.version != node->version) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADVERSION}}; new_deltas.emplace_back( request.path, zxid, - KeeperStorage::UpdateNodeDelta{ - [zxid, data = request.data, time](KeeperStorage::Node & value) + typename Storage::UpdateNodeDelta{ + [zxid, data = request.data, time](typename Storage::Node & value) { value.version++; value.mzxid = zxid; @@ -1542,9 +1671,9 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce new_deltas.emplace_back( parentNodePath(request.path).toString(), zxid, - KeeperStorage::UpdateNodeDelta + typename Storage::UpdateNodeDelta { - [](KeeperStorage::Node & parent) + [](Storage::Node & parent) { parent.cversion++; } @@ -1555,13 +1684,13 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce return new_deltas; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { auto & container = storage.container; - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperSetResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperSetRequest & request = dynamic_cast(*this->zk_request); if (const auto result = storage.commit(zxid); result != Coordination::Error::ZOK) { @@ -1579,40 +1708,41 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce return response_ptr; } - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorageBase::ResponsesForSessions + processWatches(typename Storage::Watches & watches, typename Storage::Watches & list_watches) const override { - return processWatchesImpl(zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED); + return processWatchesImpl(this->zk_request->getPath(), watches, list_watches, Coordination::Event::CHANGED); } }; -struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); + return storage.checkACL(this->zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { ProfileEvents::increment(ProfileEvents::KeeperListRequest); - Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperListRequest & request = dynamic_cast(*this->zk_request); if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; return {}; } - template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid) const + Coordination::ZooKeeperResponsePtr processImpl(Storage & storage, int64_t zxid) const { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperListResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperListRequest & request = dynamic_cast(*this->zk_request); if constexpr (!local) { @@ -1639,33 +1769,55 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc if (path_prefix.empty()) throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Path cannot be empty"); - const auto & children = node_it->value.getChildren(); + const auto & get_children = [&]() + { + if constexpr (Storage::use_rocksdb) + return container.getChildren(request.path); + else + return node_it->value.getChildren(); + }; + const auto & children = get_children(); response.names.reserve(children.size()); - const auto add_child = [&](const auto child) + const auto add_child = [&](const auto & child) { using enum Coordination::ListRequestType; auto list_request_type = ALL; if (auto * filtered_list = dynamic_cast(&request)) + { list_request_type = filtered_list->list_request_type; + } if (list_request_type == ALL) return true; - auto child_path = (std::filesystem::path(request.path) / child.toView()).generic_string(); - auto child_it = container.find(child_path); - if (child_it == container.end()) - onStorageInconsistency(); + bool is_ephemeral; + if constexpr (!Storage::use_rocksdb) + { + auto child_path = (std::filesystem::path(request.path) / child.toView()).generic_string(); + auto child_it = container.find(child_path); + if (child_it == container.end()) + onStorageInconsistency(); + is_ephemeral = child_it->value.isEphemeral(); + } + else + { + is_ephemeral = child.second.isEphemeral(); + } - const auto is_ephemeral = child_it->value.isEphemeral(); return (is_ephemeral && list_request_type == EPHEMERAL_ONLY) || (!is_ephemeral && list_request_type == PERSISTENT_ONLY); }; - for (const auto child : children) + for (const auto & child : children) { if (add_child(child)) - response.names.push_back(child.toString()); + { + if constexpr (Storage::use_rocksdb) + response.names.push_back(child.first); + else + response.names.push_back(child.toString()); + } } node_it->value.setResponseStat(response.stat); @@ -1675,63 +1827,64 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc return response_ptr; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { ProfileEvents::increment(ProfileEvents::KeeperListRequest); return processImpl(storage, zxid); } }; -struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor { explicit KeeperStorageCheckRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) - : KeeperStorageRequestProcessor(zk_request_) + : KeeperStorageRequestProcessor(zk_request_) { - check_not_exists = zk_request->getOpNum() == Coordination::OpNum::CheckNotExists; + check_not_exists = this->zk_request->getOpNum() == Coordination::OpNum::CheckNotExists; } - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - auto path = zk_request->getPath(); + auto path = this->zk_request->getPath(); return storage.checkACL(check_not_exists ? parentNodePath(path) : path, Coordination::ACL::Read, session_id, is_local); } - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { ProfileEvents::increment(ProfileEvents::KeeperCheckRequest); - Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*this->zk_request); auto node = storage.uncommitted_state.getNode(request.path); if (check_not_exists) { if (node && (request.version == -1 || request.version == node->version)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; } else { if (!node) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; if (request.version != -1 && request.version != node->version) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADVERSION}}; } return {}; } template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid) const + Coordination::ZooKeeperResponsePtr processImpl(Storage & storage, int64_t zxid) const { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperCheckResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*this->zk_request); if constexpr (!local) { @@ -1773,12 +1926,12 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro return response_ptr; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { ProfileEvents::increment(ProfileEvents::KeeperCheckRequest); return processImpl(storage, zxid); @@ -1789,55 +1942,56 @@ private: }; -struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Admin, session_id, is_local); + return storage.checkACL(this->zk_request->getPath(), Coordination::ACL::Admin, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override { - Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*this->zk_request); if (Coordination::matchPath(request.path, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH) { auto error_msg = fmt::format("Trying to update an internal Keeper path ({}) which is not allowed", request.path); handleSystemNodeModification(keeper_context, error_msg); - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; } auto & uncommitted_state = storage.uncommitted_state; if (!uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; auto node = uncommitted_state.getNode(request.path); if (request.version != -1 && request.version != node->aversion) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZBADVERSION}}; Coordination::ACLs node_acls; if (!fixupACL(request.acls, session_id, uncommitted_state, node_acls)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZINVALIDACL}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZINVALIDACL}}; - std::vector new_deltas + std::vector new_deltas { { request.path, zxid, - KeeperStorage::SetACLDelta{std::move(node_acls), request.version} + typename Storage::SetACLDelta{std::move(node_acls), request.version} }, { request.path, zxid, - KeeperStorage::UpdateNodeDelta + typename Storage::UpdateNodeDelta { - [](KeeperStorage::Node & n) { ++n.aversion; } + [](typename Storage::Node & n) { ++n.aversion; } } } }; @@ -1847,11 +2001,11 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr return new_deltas; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperSetACLResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*this->zk_request); if (const auto result = storage.commit(zxid); result != Coordination::Error::ZOK) { @@ -1869,32 +2023,33 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr } }; -struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Admin | Coordination::ACL::Read, session_id, is_local); + return storage.checkACL(this->zk_request->getPath(), Coordination::ACL::Admin | Coordination::ACL::Read, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { - Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*this->zk_request); if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZNONODE}}; return {}; } template - Coordination::ZooKeeperResponsePtr processImpl(KeeperStorage & storage, int64_t zxid) const + Coordination::ZooKeeperResponsePtr processImpl(Storage & storage, int64_t zxid) const { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperGetACLResponse & response = dynamic_cast(*response_ptr); - Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*this->zk_request); if constexpr (!local) { @@ -1923,23 +2078,24 @@ struct KeeperStorageGetACLRequestProcessor final : public KeeperStorageRequestPr return response_ptr; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { return processImpl(storage, zxid); } }; -struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestProcessor { using OperationType = Coordination::ZooKeeperMultiRequest::OperationType; std::optional operation_type; - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + bool checkAuth(Storage & storage, int64_t session_id, bool is_local) const override { for (const auto & concrete_request : concrete_requests) if (!concrete_request->checkAuth(storage, session_id, is_local)) @@ -1947,11 +2103,11 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro return true; } - std::vector concrete_requests; + std::vector>> concrete_requests; explicit KeeperStorageMultiRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) - : KeeperStorageRequestProcessor(zk_request_) + : KeeperStorageRequestProcessor(zk_request_) { - Coordination::ZooKeeperMultiRequest & request = dynamic_cast(*zk_request); + Coordination::ZooKeeperMultiRequest & request = dynamic_cast(*this->zk_request); concrete_requests.reserve(request.requests.size()); const auto check_operation_type = [&](OperationType type) @@ -1969,34 +2125,34 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro case Coordination::OpNum::Create: case Coordination::OpNum::CreateIfNotExists: check_operation_type(OperationType::Write); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::Remove: check_operation_type(OperationType::Write); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::Set: check_operation_type(OperationType::Write); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::Check: case Coordination::OpNum::CheckNotExists: check_operation_type(OperationType::Write); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::Get: check_operation_type(OperationType::Read); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::Exists: check_operation_type(OperationType::Read); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; case Coordination::OpNum::List: case Coordination::OpNum::FilteredList: case Coordination::OpNum::SimpleList: check_operation_type(OperationType::Read); - concrete_requests.push_back(std::make_shared(sub_zk_request)); + concrete_requests.push_back(std::make_shared>(sub_zk_request)); break; default: throw DB::Exception( @@ -2009,8 +2165,8 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro chassert(request.requests.empty() || operation_type.has_value()); } - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t session_id, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override { ProfileEvents::increment(ProfileEvents::KeeperMultiRequest); std::vector response_errors; @@ -2022,7 +2178,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro if (!new_deltas.empty()) { - if (auto * error = std::get_if(&new_deltas.back().operation); + if (auto * error = std::get_if(&new_deltas.back().operation); error && *operation_type == OperationType::Write) { storage.uncommitted_state.rollback(zxid); @@ -2033,10 +2189,10 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro response_errors.push_back(Coordination::Error::ZRUNTIMEINCONSISTENCY); } - return {KeeperStorage::Delta{zxid, KeeperStorage::FailedMultiDelta{std::move(response_errors)}}}; + return {typename Storage::Delta{zxid, typename Storage::FailedMultiDelta{std::move(response_errors)}}}; } } - new_deltas.emplace_back(zxid, KeeperStorage::SubDeltaEnd{}); + new_deltas.emplace_back(zxid, typename Storage::SubDeltaEnd{}); response_errors.push_back(Coordination::Error::ZOK); // manually add deltas so that the result of previous request in the transaction is used in the next request @@ -2048,15 +2204,15 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro return {}; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); auto & deltas = storage.uncommitted_state.deltas; // the deltas will have at least SubDeltaEnd or FailedMultiDelta chassert(!deltas.empty()); - if (auto * failed_multi = std::get_if(&deltas.front().operation)) + if (auto * failed_multi = std::get_if(&deltas.front().operation)) { for (size_t i = 0; i < concrete_requests.size(); ++i) { @@ -2078,10 +2234,10 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro return response_ptr; } - Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr processLocal(Storage & storage, int64_t zxid) const override { ProfileEvents::increment(ProfileEvents::KeeperMultiReadRequest); - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); for (size_t i = 0; i < concrete_requests.size(); ++i) @@ -2093,10 +2249,10 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro return response_ptr; } - KeeperStorage::ResponsesForSessions - processWatches(KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches) const override + KeeperStorageBase::ResponsesForSessions + processWatches(typename Storage::Watches & watches, typename Storage::Watches & list_watches) const override { - KeeperStorage::ResponsesForSessions result; + typename Storage::ResponsesForSessions result; for (const auto & generic_request : concrete_requests) { auto responses = generic_request->processWatches(watches, list_watches); @@ -2106,47 +2262,50 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro } }; -struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageCloseRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - Coordination::ZooKeeperResponsePtr process(KeeperStorage &, int64_t) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + Coordination::ZooKeeperResponsePtr process(Storage &, int64_t) const override { throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Called process on close request"); } }; -struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProcessor +template +struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProcessor { - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; - std::vector - preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override + using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; + + std::vector + preprocess(Storage & storage, int64_t zxid, int64_t session_id, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { - Coordination::ZooKeeperAuthRequest & auth_request = dynamic_cast(*zk_request); - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperAuthRequest & auth_request = dynamic_cast(*this->zk_request); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); if (auth_request.scheme != "digest" || std::count(auth_request.data.begin(), auth_request.data.end(), ':') != 1) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZAUTHFAILED}}; + return {typename Storage::Delta{zxid, Coordination::Error::ZAUTHFAILED}}; - std::vector new_deltas; - auto auth_digest = KeeperStorage::generateDigest(auth_request.data); + std::vector new_deltas; + auto auth_digest = Storage::generateDigest(auth_request.data); if (auth_digest == storage.superdigest) { - KeeperStorage::AuthID auth{"super", ""}; - new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(auth)}); + typename Storage::AuthID auth{"super", ""}; + new_deltas.emplace_back(zxid, typename Storage::AddAuthDelta{session_id, std::move(auth)}); } else { - KeeperStorage::AuthID new_auth{auth_request.scheme, auth_digest}; + typename Storage::AuthID new_auth{auth_request.scheme, auth_digest}; if (!storage.uncommitted_state.hasACL(session_id, false, [&](const auto & auth_id) { return new_auth == auth_id; })) - new_deltas.emplace_back(zxid, KeeperStorage::AddAuthDelta{session_id, std::move(new_auth)}); + new_deltas.emplace_back(zxid, typename Storage::AddAuthDelta{session_id, std::move(new_auth)}); } return new_deltas; } - Coordination::ZooKeeperResponsePtr process(KeeperStorage & storage, int64_t zxid) const override + Coordination::ZooKeeperResponsePtr process(Storage & storage, int64_t zxid) const override { - Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); Coordination::ZooKeeperAuthResponse & auth_response = dynamic_cast(*response_ptr); if (const auto result = storage.commit(zxid); result != Coordination::Error::ZOK) @@ -2156,7 +2315,8 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc } }; -void KeeperStorage::finalize() +template +void KeeperStorage::finalize() { if (finalized) throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage already finalized"); @@ -2171,25 +2331,26 @@ void KeeperStorage::finalize() session_expiry_queue.clear(); } -bool KeeperStorage::isFinalized() const +template +bool KeeperStorage::isFinalized() const { return finalized; } - +template class KeeperStorageRequestProcessorsFactory final : private boost::noncopyable { public: - using Creator = std::function; + using Creator = std::function>(const Coordination::ZooKeeperRequestPtr &)>; using OpNumToRequest = std::unordered_map; - static KeeperStorageRequestProcessorsFactory & instance() + static KeeperStorageRequestProcessorsFactory & instance() { - static KeeperStorageRequestProcessorsFactory factory; + static KeeperStorageRequestProcessorsFactory factory; return factory; } - KeeperStorageRequestProcessorPtr get(const Coordination::ZooKeeperRequestPtr & zk_request) const + std::shared_ptr> get(const Coordination::ZooKeeperRequestPtr & zk_request) const { auto request_it = op_num_to_request.find(zk_request->getOpNum()); if (request_it == op_num_to_request.end()) @@ -2209,39 +2370,41 @@ private: KeeperStorageRequestProcessorsFactory(); }; -template -void registerKeeperRequestProcessor(KeeperStorageRequestProcessorsFactory & factory) +template +void registerKeeperRequestProcessor(Factory & factory) { factory.registerRequest( num, [](const Coordination::ZooKeeperRequestPtr & zk_request) { return std::make_shared(zk_request); }); } -KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() +template +KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() { - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); - registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); + registerKeeperRequestProcessor>(*this); } -UInt64 KeeperStorage::calculateNodesDigest(UInt64 current_digest, const std::vector & new_deltas) const +template +UInt64 KeeperStorage::calculateNodesDigest(UInt64 current_digest, const std::vector & new_deltas) const { if (!keeper_context->digestEnabled()) return current_digest; @@ -2276,7 +2439,7 @@ UInt64 KeeperStorage::calculateNodesDigest(UInt64 current_digest, const std::vec auto updated_node_it = updated_nodes.find(delta.path); if (updated_node_it == updated_nodes.end()) { - node = std::make_shared(); + node = std::make_shared(); node->shallowCopy(*uncommitted_state.getNode(delta.path)); current_digest -= node->getDigest(delta.path); updated_nodes.emplace(delta.path, node); @@ -2302,7 +2465,8 @@ UInt64 KeeperStorage::calculateNodesDigest(UInt64 current_digest, const std::vec return current_digest; } -void KeeperStorage::preprocessRequest( +template +void KeeperStorage::preprocessRequest( const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, int64_t time, @@ -2375,7 +2539,7 @@ void KeeperStorage::preprocessRequest( uncommitted_state.addDeltas(std::move(new_deltas)); }); - KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); + auto request_processor = KeeperStorageRequestProcessorsFactory>::instance().get(zk_request); if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { @@ -2418,7 +2582,8 @@ void KeeperStorage::preprocessRequest( new_deltas = request_processor->preprocess(*this, transaction.zxid, session_id, time, new_digest, *keeper_context); } -KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( +template +KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, std::optional new_last_zxid, @@ -2459,7 +2624,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( uncommitted_transactions.pop_front(); } - KeeperStorage::ResponsesForSessions results; + ResponsesForSessions results; /// ZooKeeper update sessions expirity for each request, not only for heartbeats session_expiry_queue.addNewSessionOrUpdate(session_id, session_and_timeout[session_id]); @@ -2495,7 +2660,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( } else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat) /// Heartbeat request is also special { - KeeperStorageRequestProcessorPtr storage_request = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); + auto storage_request = KeeperStorageRequestProcessorsFactory>::instance().get(zk_request); auto response = storage_request->process(*this, zxid); response->xid = zk_request->xid; response->zxid = getZXID(); @@ -2504,7 +2669,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( } else /// normal requests proccession { - KeeperStorageRequestProcessorPtr request_processor = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); + auto request_processor = KeeperStorageRequestProcessorsFactory>::instance().get(zk_request); Coordination::ZooKeeperResponsePtr response; if (is_local) @@ -2567,7 +2732,8 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest( return results; } -void KeeperStorage::rollbackRequest(int64_t rollback_zxid, bool allow_missing) +template +void KeeperStorage::rollbackRequest(int64_t rollback_zxid, bool allow_missing) { if (allow_missing && (uncommitted_transactions.empty() || uncommitted_transactions.back().zxid < rollback_zxid)) return; @@ -2593,7 +2759,8 @@ void KeeperStorage::rollbackRequest(int64_t rollback_zxid, bool allow_missing) } } -KeeperStorage::Digest KeeperStorage::getNodesDigest(bool committed) const +template +KeeperStorageBase::Digest KeeperStorage::getNodesDigest(bool committed) const { if (!keeper_context->digestEnabled()) return {.version = DigestVersion::NO_DIGEST}; @@ -2604,13 +2771,15 @@ KeeperStorage::Digest KeeperStorage::getNodesDigest(bool committed) const return uncommitted_transactions.back().nodes_digest; } -void KeeperStorage::removeDigest(const Node & node, const std::string_view path) +template +void KeeperStorage::removeDigest(const Node & node, const std::string_view path) { if (keeper_context->digestEnabled()) nodes_digest -= node.getDigest(path); } -void KeeperStorage::addDigest(const Node & node, const std::string_view path) +template +void KeeperStorage::addDigest(const Node & node, const std::string_view path) { if (keeper_context->digestEnabled()) { @@ -2619,7 +2788,8 @@ void KeeperStorage::addDigest(const Node & node, const std::string_view path) } } -void KeeperStorage::clearDeadWatches(int64_t session_id) +template +void KeeperStorage::clearDeadWatches(int64_t session_id) { /// Clear all watches for this session auto watches_it = sessions_and_watchers.find(session_id); @@ -2652,7 +2822,8 @@ void KeeperStorage::clearDeadWatches(int64_t session_id) } } -void KeeperStorage::dumpWatches(WriteBufferFromOwnString & buf) const +template +void KeeperStorage::dumpWatches(WriteBufferFromOwnString & buf) const { for (const auto & [session_id, watches_paths] : sessions_and_watchers) { @@ -2662,7 +2833,8 @@ void KeeperStorage::dumpWatches(WriteBufferFromOwnString & buf) const } } -void KeeperStorage::dumpWatchesByPath(WriteBufferFromOwnString & buf) const +template +void KeeperStorage::dumpWatchesByPath(WriteBufferFromOwnString & buf) const { auto write_int_container = [&buf](const auto & session_ids) { @@ -2685,7 +2857,8 @@ void KeeperStorage::dumpWatchesByPath(WriteBufferFromOwnString & buf) const } } -void KeeperStorage::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const +template +void KeeperStorage::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const { auto write_str_set = [&buf](const std::unordered_set & ephemeral_paths) { @@ -2710,7 +2883,8 @@ void KeeperStorage::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) co } } -uint64_t KeeperStorage::getTotalWatchesCount() const +template +uint64_t KeeperStorage::getTotalWatchesCount() const { uint64_t ret = 0; for (const auto & [session, paths] : sessions_and_watchers) @@ -2719,12 +2893,14 @@ uint64_t KeeperStorage::getTotalWatchesCount() const return ret; } -uint64_t KeeperStorage::getSessionsWithWatchesCount() const +template +uint64_t KeeperStorage::getSessionsWithWatchesCount() const { return sessions_and_watchers.size(); } -uint64_t KeeperStorage::getTotalEphemeralNodesCount() const +template +uint64_t KeeperStorage::getTotalEphemeralNodesCount() const { uint64_t ret = 0; for (const auto & [session_id, nodes] : ephemerals) @@ -2733,12 +2909,13 @@ uint64_t KeeperStorage::getTotalEphemeralNodesCount() const return ret; } -void KeeperStorage::recalculateStats() +template +void KeeperStorage::recalculateStats() { container.recalculateDataSize(); } -bool KeeperStorage::checkDigest(const Digest & first, const Digest & second) +bool KeeperStorageBase::checkDigest(const Digest & first, const Digest & second) { if (first.version != second.version) return true; @@ -2749,13 +2926,18 @@ bool KeeperStorage::checkDigest(const Digest & first, const Digest & second) return first.value == second.value; } -String KeeperStorage::generateDigest(const String & userdata) +template +String KeeperStorage::generateDigest(const String & userdata) { std::vector user_password; boost::split(user_password, userdata, [](char character) { return character == ':'; }); return user_password[0] + ":" + base64Encode(getSHA1(userdata)); } +template class KeeperStorage>; +#if USE_ROCKSDB +template class KeeperStorage>; +#endif } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index f7812ad8877..4a9286d4835 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -8,188 +8,384 @@ #include +#include "config.h" +#if USE_ROCKSDB +#include +#endif + namespace DB { class KeeperContext; using KeeperContextPtr = std::shared_ptr; -struct KeeperStorageRequestProcessor; -using KeeperStorageRequestProcessorPtr = std::shared_ptr; using ResponseCallback = std::function; using ChildrenSet = absl::flat_hash_set; using SessionAndTimeout = std::unordered_map; -struct KeeperStorageSnapshot; - -/// Keeper state machine almost equal to the ZooKeeper's state machine. -/// Implements all logic of operations, data changes, sessions allocation. -/// In-memory and not thread safe. -class KeeperStorage +/// KeeperRocksNodeInfo is used in RocksDB keeper. +/// It is serialized directly as POD to RocksDB. +struct KeeperRocksNodeInfo { -public: - /// Node should have as minimal size as possible to reduce memory footprint - /// of stored nodes - /// New fields should be added to the struct only if it's really necessary - struct Node + int64_t czxid{0}; + int64_t mzxid{0}; + int64_t pzxid{0}; + uint64_t acl_id = 0; /// 0 -- no ACL by default + + int64_t mtime{0}; + + int32_t version{0}; + int32_t cversion{0}; + int32_t aversion{0}; + + int32_t seq_num = 0; + mutable UInt64 digest = 0; /// we cached digest for this node. + + /// as ctime can't be negative because it stores the timestamp when the + /// node was created, we can use the MSB for a bool + struct { - int64_t czxid{0}; - int64_t mzxid{0}; - int64_t pzxid{0}; - uint64_t acl_id = 0; /// 0 -- no ACL by default + bool is_ephemeral : 1; + int64_t ctime : 63; + } is_ephemeral_and_ctime{false, 0}; - int64_t mtime{0}; - - std::unique_ptr data{nullptr}; - uint32_t data_size{0}; - - int32_t version{0}; - int32_t cversion{0}; - int32_t aversion{0}; - - mutable uint64_t cached_digest = 0; - - Node() = default; - - Node & operator=(const Node & other); - Node(const Node & other); - - Node & operator=(Node && other) noexcept; - Node(Node && other) noexcept; - - bool empty() const; - - bool isEphemeral() const - { - return is_ephemeral_and_ctime.is_ephemeral; - } - - int64_t ephemeralOwner() const - { - if (isEphemeral()) - return ephemeral_or_children_data.ephemeral_owner; - - return 0; - } - - void setEphemeralOwner(int64_t ephemeral_owner) - { - is_ephemeral_and_ctime.is_ephemeral = ephemeral_owner != 0; - ephemeral_or_children_data.ephemeral_owner = ephemeral_owner; - } - - int32_t numChildren() const - { - if (isEphemeral()) - return 0; - - return ephemeral_or_children_data.children_info.num_children; - } - - void setNumChildren(int32_t num_children) - { - ephemeral_or_children_data.children_info.num_children = num_children; - } - - void increaseNumChildren() - { - chassert(!isEphemeral()); - ++ephemeral_or_children_data.children_info.num_children; - } - - void decreaseNumChildren() - { - chassert(!isEphemeral()); - --ephemeral_or_children_data.children_info.num_children; - } - - int32_t seqNum() const - { - if (isEphemeral()) - return 0; - - return ephemeral_or_children_data.children_info.seq_num; - } - - void setSeqNum(int32_t seq_num) - { - ephemeral_or_children_data.children_info.seq_num = seq_num; - } - - void increaseSeqNum() - { - chassert(!isEphemeral()); - ++ephemeral_or_children_data.children_info.seq_num; - } - - int64_t ctime() const - { - return is_ephemeral_and_ctime.ctime; - } - - void setCtime(uint64_t ctime) - { - is_ephemeral_and_ctime.ctime = ctime; - } - - void copyStats(const Coordination::Stat & stat); - - void setResponseStat(Coordination::Stat & response_stat) const; - - /// Object memory size - uint64_t sizeInBytes() const; - - void setData(const String & new_data); - - std::string_view getData() const noexcept { return {data.get(), data_size}; } - - void addChild(StringRef child_path); - - void removeChild(StringRef child_path); - - const auto & getChildren() const noexcept { return children; } - auto & getChildren() { return children; } - - // Invalidate the calculated digest so it's recalculated again on the next - // getDigest call - void invalidateDigestCache() const; - - // get the calculated digest of the node - UInt64 getDigest(std::string_view path) const; - - // copy only necessary information for preprocessing and digest calculation - // (e.g. we don't need to copy list of children) - void shallowCopy(const Node & other); - private: - /// as ctime can't be negative because it stores the timestamp when the - /// node was created, we can use the MSB for a bool + /// ephemeral notes cannot have children so a node can set either + /// ephemeral_owner OR seq_num + num_children + union + { + int64_t ephemeral_owner; struct { - bool is_ephemeral : 1; - int64_t ctime : 63; - } is_ephemeral_and_ctime{false, 0}; + int32_t seq_num; + int32_t num_children; + } children_info; + } ephemeral_or_children_data{0}; - /// ephemeral notes cannot have children so a node can set either - /// ephemeral_owner OR seq_num + num_children - union - { - int64_t ephemeral_owner; - struct - { - int32_t seq_num; - int32_t num_children; - } children_info; - } ephemeral_or_children_data{0}; + bool isEphemeral() const + { + return is_ephemeral_and_ctime.is_ephemeral; + } - ChildrenSet children{}; - }; + int64_t ephemeralOwner() const + { + if (isEphemeral()) + return ephemeral_or_children_data.ephemeral_owner; -#if !defined(ADDRESS_SANITIZER) && !defined(MEMORY_SANITIZER) - static_assert( - sizeof(ListNode) <= 144, - "std::list node containing ListNode is > 160 bytes (sizeof(ListNode) + 16 bytes for pointers) which will increase " - "memory consumption"); + return 0; + } + + void setEphemeralOwner(int64_t ephemeral_owner) + { + is_ephemeral_and_ctime.is_ephemeral = ephemeral_owner != 0; + ephemeral_or_children_data.ephemeral_owner = ephemeral_owner; + } + + int32_t numChildren() const + { + if (isEphemeral()) + return 0; + + return ephemeral_or_children_data.children_info.num_children; + } + + void setNumChildren(int32_t num_children) + { + ephemeral_or_children_data.children_info.num_children = num_children; + } + + /// dummy interface for test + void addChild(StringRef) {} + auto getChildren() const + { + return std::vector(numChildren()); + } + + void increaseNumChildren() + { + chassert(!isEphemeral()); + ++ephemeral_or_children_data.children_info.num_children; + } + + void decreaseNumChildren() + { + chassert(!isEphemeral()); + --ephemeral_or_children_data.children_info.num_children; + } + + int32_t seqNum() const + { + if (isEphemeral()) + return 0; + + return ephemeral_or_children_data.children_info.seq_num; + } + + void setSeqNum(int32_t seq_num_) + { + ephemeral_or_children_data.children_info.seq_num = seq_num_; + } + + void increaseSeqNum() + { + chassert(!isEphemeral()); + ++ephemeral_or_children_data.children_info.seq_num; + } + + int64_t ctime() const + { + return is_ephemeral_and_ctime.ctime; + } + + void setCtime(uint64_t ctime) + { + is_ephemeral_and_ctime.ctime = ctime; + } + + void copyStats(const Coordination::Stat & stat); +}; + +/// KeeperRocksNode is the memory structure used by RocksDB +struct KeeperRocksNode : public KeeperRocksNodeInfo +{ +#if USE_ROCKSDB + friend struct RocksDBContainer; #endif + using Meta = KeeperRocksNodeInfo; + + uint64_t size_bytes = 0; // only for compatible, should be deprecated + + uint64_t sizeInBytes() const { return data_size + sizeof(KeeperRocksNodeInfo); } + void setData(String new_data) + { + data_size = static_cast(new_data.size()); + if (data_size != 0) + { + data = std::unique_ptr(new char[new_data.size()]); + memcpy(data.get(), new_data.data(), data_size); + } + } + + void shallowCopy(const KeeperRocksNode & other) + { + czxid = other.czxid; + mzxid = other.mzxid; + pzxid = other.pzxid; + acl_id = other.acl_id; /// 0 -- no ACL by default + + mtime = other.mtime; + + is_ephemeral_and_ctime = other.is_ephemeral_and_ctime; + + ephemeral_or_children_data = other.ephemeral_or_children_data; + + data_size = other.data_size; + if (data_size != 0) + { + data = std::unique_ptr(new char[data_size]); + memcpy(data.get(), other.data.get(), data_size); + } + + version = other.version; + cversion = other.cversion; + aversion = other.aversion; + + /// cached_digest = other.cached_digest; + } + void invalidateDigestCache() const; + UInt64 getDigest(std::string_view path) const; + String getEncodedString(); + void decodeFromString(const String & buffer_str); + void recalculateSize() {} + std::string_view getData() const noexcept { return {data.get(), data_size}; } + + void setResponseStat(Coordination::Stat & response_stat) const + { + response_stat.czxid = czxid; + response_stat.mzxid = mzxid; + response_stat.ctime = ctime(); + response_stat.mtime = mtime; + response_stat.version = version; + response_stat.cversion = cversion; + response_stat.aversion = aversion; + response_stat.ephemeralOwner = ephemeralOwner(); + response_stat.dataLength = static_cast(data_size); + response_stat.numChildren = numChildren(); + response_stat.pzxid = pzxid; + } + + void reset() + { + serialized = false; + } + bool empty() const + { + return data_size == 0 && mzxid == 0; + } + std::unique_ptr data{nullptr}; + uint32_t data_size{0}; +private: + bool serialized = false; +}; + +/// KeeperMemNode should have as minimal size as possible to reduce memory footprint +/// of stored nodes +/// New fields should be added to the struct only if it's really necessary +struct KeeperMemNode +{ + int64_t czxid{0}; + int64_t mzxid{0}; + int64_t pzxid{0}; + uint64_t acl_id = 0; /// 0 -- no ACL by default + + int64_t mtime{0}; + + std::unique_ptr data{nullptr}; + uint32_t data_size{0}; + + int32_t version{0}; + int32_t cversion{0}; + int32_t aversion{0}; + + mutable uint64_t cached_digest = 0; + + KeeperMemNode() = default; + + KeeperMemNode & operator=(const KeeperMemNode & other); + KeeperMemNode(const KeeperMemNode & other); + + KeeperMemNode & operator=(KeeperMemNode && other) noexcept; + KeeperMemNode(KeeperMemNode && other) noexcept; + + bool empty() const; + + bool isEphemeral() const + { + return is_ephemeral_and_ctime.is_ephemeral; + } + + int64_t ephemeralOwner() const + { + if (isEphemeral()) + return ephemeral_or_children_data.ephemeral_owner; + + return 0; + } + + void setEphemeralOwner(int64_t ephemeral_owner) + { + is_ephemeral_and_ctime.is_ephemeral = ephemeral_owner != 0; + ephemeral_or_children_data.ephemeral_owner = ephemeral_owner; + } + + int32_t numChildren() const + { + if (isEphemeral()) + return 0; + + return ephemeral_or_children_data.children_info.num_children; + } + + void setNumChildren(int32_t num_children) + { + ephemeral_or_children_data.children_info.num_children = num_children; + } + + void increaseNumChildren() + { + chassert(!isEphemeral()); + ++ephemeral_or_children_data.children_info.num_children; + } + + void decreaseNumChildren() + { + chassert(!isEphemeral()); + --ephemeral_or_children_data.children_info.num_children; + } + + int32_t seqNum() const + { + if (isEphemeral()) + return 0; + + return ephemeral_or_children_data.children_info.seq_num; + } + + void setSeqNum(int32_t seq_num) + { + ephemeral_or_children_data.children_info.seq_num = seq_num; + } + + void increaseSeqNum() + { + chassert(!isEphemeral()); + ++ephemeral_or_children_data.children_info.seq_num; + } + + int64_t ctime() const + { + return is_ephemeral_and_ctime.ctime; + } + + void setCtime(uint64_t ctime) + { + is_ephemeral_and_ctime.ctime = ctime; + } + + void copyStats(const Coordination::Stat & stat); + + void setResponseStat(Coordination::Stat & response_stat) const; + + /// Object memory size + uint64_t sizeInBytes() const; + + void setData(const String & new_data); + + std::string_view getData() const noexcept { return {data.get(), data_size}; } + + void addChild(StringRef child_path); + + void removeChild(StringRef child_path); + + const auto & getChildren() const noexcept { return children; } + auto & getChildren() { return children; } + + // Invalidate the calculated digest so it's recalculated again on the next + // getDigest call + void invalidateDigestCache() const; + + // get the calculated digest of the node + UInt64 getDigest(std::string_view path) const; + + // copy only necessary information for preprocessing and digest calculation + // (e.g. we don't need to copy list of children) + void shallowCopy(const KeeperMemNode & other); +private: + /// as ctime can't be negative because it stores the timestamp when the + /// node was created, we can use the MSB for a bool + struct + { + bool is_ephemeral : 1; + int64_t ctime : 63; + } is_ephemeral_and_ctime{false, 0}; + + /// ephemeral notes cannot have children so a node can set either + /// ephemeral_owner OR seq_num + num_children + union + { + int64_t ephemeral_owner; + struct + { + int32_t seq_num; + int32_t num_children; + } children_info; + } ephemeral_or_children_data{0}; + + ChildrenSet children{}; +}; + +class KeeperStorageBase +{ +public: enum DigestVersion : uint8_t { @@ -200,7 +396,11 @@ public: V4 = 4 // 0 is not a valid digest value }; - static constexpr auto CURRENT_DIGEST_VERSION = DigestVersion::V4; + struct Digest + { + DigestVersion version{DigestVersion::NO_DIGEST}; + uint64_t value{0}; + }; struct ResponseForSession { @@ -210,16 +410,6 @@ public: }; using ResponsesForSessions = std::vector; - struct Digest - { - DigestVersion version{DigestVersion::NO_DIGEST}; - uint64_t value{0}; - }; - - static bool checkDigest(const Digest & first, const Digest & second); - - static String generateDigest(const String & userdata); - struct RequestForSession { int64_t session_id; @@ -229,6 +419,7 @@ public: std::optional digest; int64_t log_idx{0}; }; + using RequestsForSessions = std::vector; struct AuthID { @@ -238,9 +429,6 @@ public: bool operator==(const AuthID & other) const { return scheme == other.scheme && id == other.id; } }; - using RequestsForSessions = std::vector; - - using Container = SnapshotableHashTable; using Ephemerals = std::unordered_map>; using SessionAndWatcher = std::unordered_map>; using SessionIDs = std::unordered_set; @@ -250,6 +438,38 @@ public: using SessionAndAuth = std::unordered_map; using Watches = std::unordered_map; + static bool checkDigest(const Digest & first, const Digest & second); + +}; + +/// Keeper state machine almost equal to the ZooKeeper's state machine. +/// Implements all logic of operations, data changes, sessions allocation. +/// In-memory and not thread safe. +template +class KeeperStorage : public KeeperStorageBase +{ +public: + using Container = Container_; + using Node = Container::Node; + +#if !defined(ADDRESS_SANITIZER) && !defined(MEMORY_SANITIZER) + static_assert( + sizeof(ListNode) <= 144, + "std::list node containing ListNode is > 160 bytes (sizeof(ListNode) + 16 bytes for pointers) which will increase " + "memory consumption"); +#endif + + +#if USE_ROCKSDB + static constexpr bool use_rocksdb = std::is_same_v>; +#else + static constexpr bool use_rocksdb = false; +#endif + + static constexpr auto CURRENT_DIGEST_VERSION = DigestVersion::V4; + + static String generateDigest(const String & userdata); + int64_t session_id_counter{1}; SessionAndAuth session_and_auth; @@ -393,7 +613,7 @@ public: std::unordered_map, Hash, Equal> deltas_for_path; std::list deltas; - KeeperStorage & storage; + KeeperStorage & storage; }; UncommittedState uncommitted_state{*this}; @@ -530,10 +750,16 @@ public: /// Set of methods for creating snapshots /// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version. - void enableSnapshotMode(size_t up_to_version) { container.enableSnapshotMode(up_to_version); } + void enableSnapshotMode(size_t up_to_version) + { + container.enableSnapshotMode(up_to_version); + } /// Turn off snapshot mode. - void disableSnapshotMode() { container.disableSnapshotMode(); } + void disableSnapshotMode() + { + container.disableSnapshotMode(); + } Container::const_iterator getSnapshotIteratorBegin() const { return container.begin(); } @@ -572,6 +798,9 @@ private: void addDigest(const Node & node, std::string_view path); }; -using KeeperStoragePtr = std::unique_ptr; +using KeeperMemoryStorage = KeeperStorage>; +#if USE_ROCKSDB +using KeeperRocksStorage = KeeperStorage>; +#endif } diff --git a/src/Coordination/RocksDBContainer.h b/src/Coordination/RocksDBContainer.h new file mode 100644 index 00000000000..12b40bbb87e --- /dev/null +++ b/src/Coordination/RocksDBContainer.h @@ -0,0 +1,460 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ROCKSDB_ERROR; + extern const int LOGICAL_ERROR; +} + +/// The key-value format of rocks db will be +/// - key: Int8 (depth of the path) + String (path) +/// - value: SizeOf(keeperRocksNodeInfo) (meta of the node) + String (data) + +template +struct RocksDBContainer +{ + using Node = Node_; + +private: + /// MockNode is only use in test to mock `getChildren()` and `getData()` + struct MockNode + { + std::vector children; + std::string data; + MockNode(size_t children_num, std::string_view data_) + : children(std::vector(children_num)), + data(data_) + { + } + + std::vector getChildren() { return children; } + std::string getData() { return data; } + }; + + UInt16 getKeyDepth(const std::string & key) + { + UInt16 depth = 0; + for (size_t i = 0; i < key.size(); i++) + { + if (key[i] == '/' && i + 1 != key.size()) + depth ++; + } + return depth; + } + + std::string getEncodedKey(const std::string & key, bool child_prefix = false) + { + WriteBufferFromOwnString key_buffer; + UInt16 depth = getKeyDepth(key) + (child_prefix ? 1 : 0); + writeIntBinary(depth, key_buffer); + writeString(key, key_buffer); + return key_buffer.str(); + } + + static std::string_view getDecodedKey(const std::string_view & key) + { + return std::string_view(key.begin() + 2, key.end()); + } + + + struct KVPair + { + StringRef key; + Node value; + }; + + using ValueUpdater = std::function; + +public: + + /// This is an iterator wrapping rocksdb iterator and the kv result. + struct const_iterator + { + std::shared_ptr iter; + + std::shared_ptr pair; + + const_iterator() = default; + + explicit const_iterator(std::shared_ptr pair_) : pair(std::move(pair_)) {} + + explicit const_iterator(rocksdb::Iterator * iter_) : iter(iter_) + { + updatePairFromIter(); + } + + const KVPair & operator * () const + { + return *pair; + } + + const KVPair * operator->() const + { + return pair.get(); + } + + bool operator != (const const_iterator & other) const + { + return !(*this == other); + } + + bool operator == (const const_iterator & other) const + { + if (pair == nullptr && other == nullptr) + return true; + if (pair == nullptr || other == nullptr) + return false; + return pair->key.toView() == other->key.toView() && iter == other.iter; + } + + bool operator == (std::nullptr_t) const + { + return iter == nullptr; + } + + bool operator != (std::nullptr_t) const + { + return iter != nullptr; + } + + explicit operator bool() const + { + return iter != nullptr; + } + + const_iterator & operator ++() + { + iter->Next(); + updatePairFromIter(); + return *this; + } + + private: + void updatePairFromIter() + { + if (iter && iter->Valid()) + { + auto new_pair = std::make_shared(); + new_pair->key = StringRef(getDecodedKey(iter->key().ToStringView())); + ReadBufferFromOwnString buffer(iter->value().ToStringView()); + typename Node::Meta & meta = new_pair->value; + readPODBinary(meta, buffer); + readVarUInt(new_pair->value.data_size, buffer); + if (new_pair->value.data_size) + { + new_pair->value.data = std::unique_ptr(new char[new_pair->value.data_size]); + buffer.readStrict(new_pair->value.data.get(), new_pair->value.data_size); + } + pair = new_pair; + } + else + { + pair = nullptr; + iter = nullptr; + } + } + }; + + bool initialized = false; + + const const_iterator end_ptr; + + void initialize(const KeeperContextPtr & context) + { + DiskPtr disk = context->getTemporaryRocksDBDisk(); + if (disk == nullptr) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get rocksdb disk"); + } + auto options = context->getRocksDBOptions(); + if (options == nullptr) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get rocksdb options"); + } + rocksdb_dir = disk->getPath(); + rocksdb::DB * db; + auto status = rocksdb::DB::Open(*options, rocksdb_dir, &db); + if (!status.ok()) + { + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", + rocksdb_dir, status.ToString()); + } + rocksdb_ptr = std::unique_ptr(db); + write_options.disableWAL = true; + initialized = true; + } + + ~RocksDBContainer() + { + if (initialized) + { + rocksdb_ptr->Close(); + rocksdb_ptr = nullptr; + + std::filesystem::remove_all(rocksdb_dir); + } + } + + std::vector> getChildren(const std::string & key_) + { + rocksdb::ReadOptions read_options; + read_options.total_order_seek = true; + + std::string key = key_; + if (!key.ends_with('/')) + key += '/'; + size_t len = key.size() + 2; + + auto iter = std::unique_ptr(rocksdb_ptr->NewIterator(read_options)); + std::string encoded_string = getEncodedKey(key, true); + rocksdb::Slice prefix(encoded_string); + std::vector> result; + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) + { + Node node; + ReadBufferFromOwnString buffer(iter->value().ToStringView()); + typename Node::Meta & meta = node; + /// We do not read data here + readPODBinary(meta, buffer); + std::string real_key(iter->key().data() + len, iter->key().size() - len); + // std::cout << "real key: " << real_key << std::endl; + result.emplace_back(std::move(real_key), std::move(node)); + } + + return result; + } + + bool contains(const std::string & path) + { + const std::string & encoded_key = getEncodedKey(path); + std::string buffer_str; + rocksdb::Status status = rocksdb_ptr->Get(rocksdb::ReadOptions(), encoded_key, &buffer_str); + if (status.IsNotFound()) + return false; + if (!status.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during executing contains. The error message is {}.", status.ToString()); + return true; + } + + const_iterator find(StringRef key_) + { + /// rocksdb::PinnableSlice slice; + const std::string & encoded_key = getEncodedKey(key_.toString()); + std::string buffer_str; + rocksdb::Status status = rocksdb_ptr->Get(rocksdb::ReadOptions(), encoded_key, &buffer_str); + if (status.IsNotFound()) + return end(); + if (!status.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during executing find. The error message is {}.", status.ToString()); + ReadBufferFromOwnString buffer(buffer_str); + auto kv = std::make_shared(); + kv->key = key_; + typename Node::Meta & meta = kv->value; + readPODBinary(meta, buffer); + /// TODO: Sometimes we don't need to load data. + readVarUInt(kv->value.data_size, buffer); + if (kv->value.data_size) + { + kv->value.data = std::unique_ptr(new char[kv->value.data_size]); + buffer.readStrict(kv->value.data.get(), kv->value.data_size); + } + return const_iterator(kv); + } + + MockNode getValue(StringRef key) + { + auto it = find(key); + chassert(it != end()); + return MockNode(it->value.numChildren(), it->value.getData()); + } + + const_iterator updateValue(StringRef key_, ValueUpdater updater) + { + /// rocksdb::PinnableSlice slice; + const std::string & key = key_.toString(); + const std::string & encoded_key = getEncodedKey(key); + std::string buffer_str; + rocksdb::Status status = rocksdb_ptr->Get(rocksdb::ReadOptions(), encoded_key, &buffer_str); + if (!status.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during find. The error message is {}.", status.ToString()); + auto kv = std::make_shared(); + kv->key = key_; + kv->value.decodeFromString(buffer_str); + /// storage->removeDigest(node, key); + updater(kv->value); + insertOrReplace(key, kv->value); + return const_iterator(kv); + } + + bool insert(const std::string & key, Node & value) + { + std::string value_str; + const std::string & encoded_key = getEncodedKey(key); + rocksdb::Status status = rocksdb_ptr->Get(rocksdb::ReadOptions(), encoded_key, &value_str); + if (status.ok()) + { + return false; + } + else if (status.IsNotFound()) + { + status = rocksdb_ptr->Put(write_options, encoded_key, value.getEncodedString()); + if (status.ok()) + { + counter++; + return true; + } + } + + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during insert. The error message is {}.", status.ToString()); + } + + void insertOrReplace(const std::string & key, Node & value) + { + const std::string & encoded_key = getEncodedKey(key); + /// storage->addDigest(value, key); + std::string value_str; + rocksdb::Status status = rocksdb_ptr->Get(rocksdb::ReadOptions(), encoded_key, &value_str); + bool increase_counter = false; + if (status.IsNotFound()) + increase_counter = true; + else if (!status.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during get. The error message is {}.", status.ToString()); + + status = rocksdb_ptr->Put(write_options, encoded_key, value.getEncodedString()); + if (status.ok()) + counter += increase_counter; + else + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during insert. The error message is {}.", status.ToString()); + } + + using KeyPtr = std::unique_ptr; + + /// To be compatible with SnapshotableHashTable, will remove later; + KeyPtr allocateKey(size_t size) + { + return KeyPtr{new char[size]}; + } + + void insertOrReplace(KeyPtr key_data, size_t key_size, Node value) + { + std::string key(key_data.get(), key_size); + insertOrReplace(key, value); + } + + bool erase(const std::string & key) + { + /// storage->removeDigest(value, key); + const std::string & encoded_key = getEncodedKey(key); + + auto status = rocksdb_ptr->Delete(write_options, encoded_key); + if (status.IsNotFound()) + return false; + if (status.ok()) + { + counter--; + return true; + } + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Got rocksdb error during erase. The error message is {}.", status.ToString()); + } + + void recalculateDataSize() {} + void reverse(size_t size_) {(void)size_;} + + uint64_t getApproximateDataSize() const + { + /// use statistics from rocksdb + return counter * sizeof(Node); + } + + void enableSnapshotMode(size_t version) + { + chassert(!snapshot_mode); + snapshot_mode = true; + snapshot_up_to_version = version; + snapshot_size = counter; + ++current_version; + + snapshot = rocksdb_ptr->GetSnapshot(); + } + + void disableSnapshotMode() + { + chassert(snapshot_mode); + snapshot_mode = false; + rocksdb_ptr->ReleaseSnapshot(snapshot); + } + + void clearOutdatedNodes() {} + + std::pair snapshotSizeWithVersion() const + { + if (!snapshot_mode) + return std::make_pair(counter, current_version); + else + return std::make_pair(snapshot_size, current_version); + } + + const_iterator begin() const + { + rocksdb::ReadOptions read_options; + read_options.total_order_seek = true; + if (snapshot_mode) + read_options.snapshot = snapshot; + auto * iter = rocksdb_ptr->NewIterator(read_options); + iter->SeekToFirst(); + return const_iterator(iter); + } + + const_iterator end() const + { + return end_ptr; + } + + size_t size() const + { + return counter; + } + + uint64_t getArenaDataSize() const + { + return 0; + } + + uint64_t keyArenaSize() const + { + return 0; + } + +private: + String rocksdb_dir; + + std::unique_ptr rocksdb_ptr; + rocksdb::WriteOptions write_options; + + const rocksdb::Snapshot * snapshot; + + bool snapshot_mode{false}; + size_t current_version{0}; + size_t snapshot_up_to_version{0}; + size_t snapshot_size{0}; + size_t counter{0}; + +}; + +} diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index 5f2b14e17b0..85452558496 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -212,9 +212,9 @@ private: updateDataSize(INSERT_OR_REPLACE, key.size, new_value_size, old_value_size, !snapshot_mode); } - public: + using Node = V; using iterator = typename List::iterator; using const_iterator = typename List::const_iterator; using ValueUpdater = std::function; @@ -364,6 +364,7 @@ public: { auto map_it = map.find(key); if (map_it != map.end()) + /// return std::make_shared(KVPair{map_it->getMapped()->key, map_it->getMapped()->value}); return map_it->getMapped(); return list.end(); } diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index c205db942b9..99d71b85e78 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -43,7 +43,8 @@ void deserializeSnapshotMagic(ReadBuffer & in) throw Exception(ErrorCodes::CORRUPTED_DATA, "Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header); } -int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in) +template +int64_t deserializeSessionAndTimeout(Storage & storage, ReadBuffer & in) { int32_t count; Coordination::read(count, in); @@ -62,7 +63,8 @@ int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in) return max_session_id; } -void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in) +template +void deserializeACLMap(Storage & storage, ReadBuffer & in) { int32_t count; Coordination::read(count, in); @@ -90,7 +92,8 @@ void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in) } } -int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, LoggerPtr log) +template +int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log) { int64_t max_zxid = 0; std::string path; @@ -98,7 +101,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, LoggerP size_t count = 0; while (path != "/") { - KeeperStorage::Node node{}; + typename Storage::Node node{}; String data; Coordination::read(data, in); node.setData(data); @@ -146,14 +149,15 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, LoggerP if (itr.key != "/") { auto parent_path = parentNodePath(itr.key); - storage.container.updateValue(parent_path, [my_path = itr.key] (KeeperStorage::Node & value) { value.addChild(getBaseNodeName(my_path)); value.increaseNumChildren(); }); + storage.container.updateValue(parent_path, [my_path = itr.key] (typename Storage::Node & value) { value.addChild(getBaseNodeName(my_path)); value.increaseNumChildren(); }); } } return max_zxid; } -void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path, LoggerPtr log) +template +void deserializeKeeperStorageFromSnapshot(Storage & storage, const std::string & snapshot_path, LoggerPtr log) { LOG_INFO(log, "Deserializing storage snapshot {}", snapshot_path); int64_t zxid = getZxidFromName(snapshot_path); @@ -192,9 +196,11 @@ void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::st LOG_INFO(log, "Finished, snapshot ZXID {}", storage.zxid); } -void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path, LoggerPtr log) +namespace fs = std::filesystem; + +template +void deserializeKeeperStorageFromSnapshotsDir(Storage & storage, const std::string & path, LoggerPtr log) { - namespace fs = std::filesystem; std::map existing_snapshots; for (const auto & p : fs::directory_iterator(path)) { @@ -480,7 +486,8 @@ bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request) } -bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, LoggerPtr /*log*/) +template +bool deserializeTxn(Storage & storage, ReadBuffer & in, LoggerPtr /*log*/) { int64_t checksum; Coordination::read(checksum, in); @@ -535,7 +542,8 @@ bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, LoggerPtr /*log*/) return true; } -void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path, LoggerPtr log) +template +void deserializeLogAndApplyToStorage(Storage & storage, const std::string & log_path, LoggerPtr log) { ReadBufferFromFile reader(log_path); @@ -559,9 +567,9 @@ void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string LOG_INFO(log, "Finished {} deserialization, totally read {} records", log_path, counter); } -void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path, LoggerPtr log) +template +void deserializeLogsAndApplyToStorage(Storage & storage, const std::string & path, LoggerPtr log) { - namespace fs = std::filesystem; std::map existing_logs; for (const auto & p : fs::directory_iterator(path)) { @@ -595,4 +603,9 @@ void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string } } +template void deserializeKeeperStorageFromSnapshot(KeeperMemoryStorage & storage, const std::string & snapshot_path, LoggerPtr log); +template void deserializeKeeperStorageFromSnapshotsDir(KeeperMemoryStorage & storage, const std::string & path, LoggerPtr log); +template void deserializeLogAndApplyToStorage(KeeperMemoryStorage & storage, const std::string & log_path, LoggerPtr log); +template void deserializeLogsAndApplyToStorage(KeeperMemoryStorage & storage, const std::string & path, LoggerPtr log); + } diff --git a/src/Coordination/ZooKeeperDataReader.h b/src/Coordination/ZooKeeperDataReader.h index 648dc95adcf..5520d1d3215 100644 --- a/src/Coordination/ZooKeeperDataReader.h +++ b/src/Coordination/ZooKeeperDataReader.h @@ -5,12 +5,16 @@ namespace DB { -void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path, LoggerPtr log); +template +void deserializeKeeperStorageFromSnapshot(Storage & storage, const std::string & snapshot_path, LoggerPtr log); -void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path, LoggerPtr log); +template +void deserializeKeeperStorageFromSnapshotsDir(Storage & storage, const std::string & path, LoggerPtr log); -void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path, LoggerPtr log); +template +void deserializeLogAndApplyToStorage(Storage & storage, const std::string & log_path, LoggerPtr log); -void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path, LoggerPtr log); +template +void deserializeLogsAndApplyToStorage(Storage & storage, const std::string & path, LoggerPtr log); } diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index a329bec8e2a..beae6254562 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -60,10 +61,22 @@ struct CompressionParam std::string extension; }; -class CoordinationTest : public ::testing::TestWithParam +template +struct TestParam { -protected: - DB::KeeperContextPtr keeper_context = std::make_shared(true, std::make_shared()); + using Storage = Storage_; + static constexpr bool enable_compression = enable_compression_; +}; + +template +class CoordinationTest : public ::testing::Test +{ +public: + using Storage = typename TestType::Storage; + static constexpr bool enable_compression = TestType::enable_compression; + std::string extension; + + DB::KeeperContextPtr keeper_context; LoggerPtr log{getLogger("CoordinationTest")}; void SetUp() override @@ -72,7 +85,12 @@ protected: Poco::Logger::root().setChannel(channel); Poco::Logger::root().setLevel("trace"); + auto settings = std::make_shared(); + settings->experimental_use_rocksdb = true; + keeper_context = std::make_shared(true, settings); keeper_context->setLocalLogsPreprocessed(); + keeper_context->setRocksDBOptions(); + extension = enable_compression ? ".zstd" : ""; } void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } @@ -82,13 +100,24 @@ protected: keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); } + void setRocksDBDirectory(const std::string & path) + { + keeper_context->setRocksDBDisk(std::make_shared("RocksDisk", path)); + } + void setStateFileDirectory(const std::string & path) { keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); } }; -TEST_P(CoordinationTest, RaftServerConfigParse) +using Implementation = testing::Types, + TestParam, + TestParam, + TestParam>; +TYPED_TEST_SUITE(CoordinationTest, Implementation); + +TYPED_TEST(CoordinationTest, RaftServerConfigParse) { auto parse = Coordination::RaftServerConfig::parse; using Cfg = std::optional; @@ -113,7 +142,7 @@ TEST_P(CoordinationTest, RaftServerConfigParse) (Cfg{{1, "2001:0db8:85a3:0000:0000:8a2e:0370:7334:80"}})); } -TEST_P(CoordinationTest, RaftServerClusterConfigParse) +TYPED_TEST(CoordinationTest, RaftServerClusterConfigParse) { auto parse = Coordination::parseRaftServers; using Cfg = DB::RaftServerConfig; @@ -129,14 +158,14 @@ TEST_P(CoordinationTest, RaftServerClusterConfigParse) (Servers{Cfg{1, "host:80"}, Cfg{2, "host:81"}})); } -TEST_P(CoordinationTest, BuildTest) +TYPED_TEST(CoordinationTest, BuildTest) { DB::InMemoryLogStore store; DB::SummingStateMachine machine; EXPECT_EQ(1, 1); } -TEST_P(CoordinationTest, BufferSerde) +TYPED_TEST(CoordinationTest, BufferSerde) { Coordination::ZooKeeperRequestPtr request = Coordination::ZooKeeperRequestFactory::instance().get(Coordination::OpNum::Get); request->xid = 3; @@ -260,13 +289,13 @@ nuraft::ptr getBuffer(int64_t number) return ret; } -TEST_P(CoordinationTest, TestSummingRaft1) +TYPED_TEST(CoordinationTest, TestSummingRaft1) { ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); - setStateFileDirectory("."); + this->setLogDirectory("./logs"); + this->setStateFileDirectory("."); - SummingRaftServer s1(1, "localhost", 44444, keeper_context); + SummingRaftServer s1(1, "localhost", 44444, this->keeper_context); SCOPE_EXIT(if (std::filesystem::exists("./state")) std::filesystem::remove("./state");); /// Single node is leader @@ -279,7 +308,7 @@ TEST_P(CoordinationTest, TestSummingRaft1) while (s1.state_machine->getValue() != 143) { - LOG_INFO(log, "Waiting s1 to apply entry"); + LOG_INFO(this->log, "Waiting s1 to apply entry"); std::this_thread::sleep_for(std::chrono::milliseconds(100)); } @@ -295,16 +324,16 @@ DB::LogEntryPtr getLogEntry(const std::string & s, size_t term) return nuraft::cs_new(term, bufwriter.getBuffer()); } -TEST_P(CoordinationTest, ChangelogTestSimple) +TYPED_TEST(CoordinationTest, ChangelogTestSimple) { - auto params = GetParam(); + /// ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello world", 77); changelog.append(entry); @@ -327,16 +356,15 @@ void waitDurableLogs(nuraft::log_store & log_store) } -TEST_P(CoordinationTest, ChangelogTestFile) +TYPED_TEST(CoordinationTest, ChangelogTestFile) { - auto params = GetParam(); ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello world", 77); changelog.append(entry); @@ -344,9 +372,9 @@ TEST_P(CoordinationTest, ChangelogTestFile) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); for (const auto & p : fs::directory_iterator("./logs")) - EXPECT_EQ(p.path(), "./logs/changelog_1_5.bin" + params.extension); + EXPECT_EQ(p.path(), "./logs/changelog_1_5.bin" + this->extension); changelog.append(entry); changelog.append(entry); @@ -357,20 +385,20 @@ TEST_P(CoordinationTest, ChangelogTestFile) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); } -TEST_P(CoordinationTest, ChangelogReadWrite) +TYPED_TEST(CoordinationTest, ChangelogReadWrite) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 1000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) @@ -385,9 +413,9 @@ TEST_P(CoordinationTest, ChangelogReadWrite) waitDurableLogs(changelog); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 1000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 10); EXPECT_EQ(changelog_reader.last_entry()->get_term(), changelog.last_entry()->get_term()); @@ -403,16 +431,16 @@ TEST_P(CoordinationTest, ChangelogReadWrite) EXPECT_EQ(10, entries_from_range->size()); } -TEST_P(CoordinationTest, ChangelogWriteAt) +TYPED_TEST(CoordinationTest, ChangelogWriteAt) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 1000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -435,9 +463,9 @@ TEST_P(CoordinationTest, ChangelogWriteAt) EXPECT_EQ(changelog.next_slot(), 8); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1000}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 1000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), changelog.size()); @@ -447,16 +475,16 @@ TEST_P(CoordinationTest, ChangelogWriteAt) } -TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) +TYPED_TEST(CoordinationTest, ChangelogTestAppendAfterRead) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 7; ++i) { @@ -469,13 +497,13 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 7); @@ -488,8 +516,8 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) EXPECT_EQ(changelog_reader.size(), 10); waitDurableLogs(changelog_reader); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); size_t logs_count = 0; for (const auto & _ [[maybe_unused]] : fs::directory_iterator("./logs")) @@ -504,9 +532,9 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) waitDurableLogs(changelog_reader); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); logs_count = 0; for (const auto & _ [[maybe_unused]] : fs::directory_iterator("./logs")) @@ -533,16 +561,16 @@ void assertFileDeleted(std::string path) } -TEST_P(CoordinationTest, ChangelogTestCompaction) +TYPED_TEST(CoordinationTest, ChangelogTestCompaction) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 3; ++i) @@ -556,7 +584,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog.size(), 3); - keeper_context->setLastCommitIndex(2); + this->keeper_context->setLastCommitIndex(2); changelog.compact(2); EXPECT_EQ(changelog.size(), 1); @@ -564,7 +592,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog.next_slot(), 4); EXPECT_EQ(changelog.last_entry()->get_term(), 20); // nothing should be deleted - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); auto e1 = getLogEntry("hello world", 30); changelog.append(e1); @@ -578,15 +606,15 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); - keeper_context->setLastCommitIndex(6); + this->keeper_context->setLastCommitIndex(6); changelog.compact(6); std::this_thread::sleep_for(std::chrono::microseconds(1000)); - assertFileDeleted("./logs/changelog_1_5.bin" + params.extension); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + assertFileDeleted("./logs/changelog_1_5.bin" + this->extension); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); EXPECT_EQ(changelog.size(), 1); EXPECT_EQ(changelog.start_index(), 7); @@ -594,9 +622,9 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog.last_entry()->get_term(), 60); /// And we able to read it DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(7, 0); EXPECT_EQ(changelog_reader.size(), 1); @@ -605,16 +633,16 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog_reader.last_entry()->get_term(), 60); } -TEST_P(CoordinationTest, ChangelogTestBatchOperations) +TYPED_TEST(CoordinationTest, ChangelogTestBatchOperations) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -630,9 +658,9 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations) auto entries = changelog.pack(1, 5); DB::KeeperLogStore apply_changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); apply_changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) @@ -660,18 +688,18 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations) EXPECT_EQ(apply_changelog.entry_at(12)->get_term(), 40); } -TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) +TYPED_TEST(CoordinationTest, ChangelogTestBatchOperationsEmpty) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); nuraft::ptr entries; { DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 10; ++i) { @@ -688,11 +716,11 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) } ChangelogDirTest test1("./logs1"); - setLogDirectory("./logs1"); + this->setLogDirectory("./logs1"); DB::KeeperLogStore changelog_new( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_new.init(1, 0); EXPECT_EQ(changelog_new.size(), 0); @@ -715,23 +743,23 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) EXPECT_EQ(changelog_new.next_slot(), 11); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(5, 0); } -TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) +TYPED_TEST(CoordinationTest, ChangelogTestWriteAtPreviousFile) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) @@ -743,13 +771,13 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); EXPECT_EQ(changelog.size(), 33); @@ -763,19 +791,19 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); DB::KeeperLogStore changelog_read( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_read.init(1, 0); EXPECT_EQ(changelog_read.size(), 7); EXPECT_EQ(changelog_read.start_index(), 1); @@ -783,16 +811,16 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) EXPECT_EQ(changelog_read.last_entry()->get_term(), 5555); } -TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) +TYPED_TEST(CoordinationTest, ChangelogTestWriteAtFileBorder) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) @@ -804,13 +832,13 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); EXPECT_EQ(changelog.size(), 33); @@ -824,19 +852,19 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); DB::KeeperLogStore changelog_read( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_read.init(1, 0); EXPECT_EQ(changelog_read.size(), 11); EXPECT_EQ(changelog_read.start_index(), 1); @@ -844,16 +872,16 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) EXPECT_EQ(changelog_read.last_entry()->get_term(), 5555); } -TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) +TYPED_TEST(CoordinationTest, ChangelogTestWriteAtAllFiles) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 33; ++i) { @@ -864,13 +892,13 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); EXPECT_EQ(changelog.size(), 33); @@ -884,26 +912,26 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); } -TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) +TYPED_TEST(CoordinationTest, ChangelogTestStartNewLogAfterRead) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -915,19 +943,19 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) EXPECT_EQ(changelog.size(), 35); waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin" + this->extension)); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); auto entry = getLogEntry("36_hello_world", 360); @@ -937,14 +965,14 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) EXPECT_EQ(changelog_reader.size(), 36); waitDurableLogs(changelog_reader); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_36_40.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_36_40.bin" + this->extension)); } namespace @@ -965,18 +993,18 @@ void assertBrokenFileRemoved(const fs::path & directory, const fs::path & filena } -TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) +TYPED_TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) { static const fs::path log_folder{"./logs"}; - auto params = GetParam(); + ChangelogDirTest test(log_folder); - setLogDirectory(log_folder); + this->setLogDirectory(log_folder); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -988,36 +1016,36 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) EXPECT_EQ(changelog.size(), 35); waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_16_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_25.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_26_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + this->extension)); DB::WriteBufferFromFile plain_buf( - "./logs/changelog_11_15.bin" + params.extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); + "./logs/changelog_11_15.bin" + this->extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); plain_buf.truncate(0); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); changelog_reader.end_of_append_batch(0, 0); EXPECT_EQ(changelog_reader.size(), 10); EXPECT_EQ(changelog_reader.last_entry()->get_term(), 90); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); - assertBrokenFileRemoved(log_folder, "changelog_16_20.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_21_25.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_26_30.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_31_35.bin" + params.extension); + assertBrokenFileRemoved(log_folder, "changelog_16_20.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_21_25.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_26_30.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_31_35.bin" + this->extension); auto entry = getLogEntry("h", 7777); changelog_reader.append(entry); @@ -1027,35 +1055,35 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) waitDurableLogs(changelog_reader); - EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + this->extension)); - assertBrokenFileRemoved(log_folder, "changelog_16_20.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_21_25.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_26_30.bin" + params.extension); - assertBrokenFileRemoved(log_folder, "changelog_31_35.bin" + params.extension); + assertBrokenFileRemoved(log_folder, "changelog_16_20.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_21_25.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_26_30.bin" + this->extension); + assertBrokenFileRemoved(log_folder, "changelog_31_35.bin" + this->extension); DB::KeeperLogStore changelog_reader2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader2.init(1, 0); EXPECT_EQ(changelog_reader2.size(), 11); EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777); } /// Truncating all entries -TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) +TYPED_TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1066,22 +1094,22 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) changelog.end_of_append_batch(0, 0); waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + this->extension)); DB::WriteBufferFromFile plain_buf( - "./logs/changelog_1_20.bin" + params.extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); + "./logs/changelog_1_20.bin" + this->extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); plain_buf.truncate(30); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 0); - EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension)); - assertBrokenFileRemoved("./logs", "changelog_21_40.bin" + params.extension); + EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + this->extension)); + assertBrokenFileRemoved("./logs", "changelog_21_40.bin" + this->extension); auto entry = getLogEntry("hello_world", 7777); changelog_reader.append(entry); changelog_reader.end_of_append_batch(0, 0); @@ -1092,9 +1120,9 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777); DB::KeeperLogStore changelog_reader2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 1}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 1}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader2.init(1, 0); EXPECT_EQ(changelog_reader2.size(), 1); EXPECT_EQ(changelog_reader2.last_entry()->get_term(), 7777); @@ -1103,15 +1131,15 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) /// Truncating only some entries from the end /// For compressed logs we have no reliable way of knowing how many log entries were lost /// after we truncate some bytes from the end -TEST_F(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3) +TYPED_TEST(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3) { ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1133,7 +1161,7 @@ TEST_F(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3) DB::KeeperLogStore changelog_reader( DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 19); @@ -1150,10 +1178,10 @@ TEST_F(CoordinationTest, ChangelogTestReadAfterBrokenTruncate3) EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777); } -TEST_F(CoordinationTest, ChangelogTestMixedLogTypes) +TYPED_TEST(CoordinationTest, ChangelogTestMixedLogTypes) { ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); std::vector changelog_files; @@ -1185,7 +1213,7 @@ TEST_F(CoordinationTest, ChangelogTestMixedLogTypes) DB::KeeperLogStore changelog( DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1206,7 +1234,7 @@ TEST_F(CoordinationTest, ChangelogTestMixedLogTypes) DB::KeeperLogStore changelog_compressed( DB::LogFileSettings{.force_sync = true, .compress_logs = true, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_compressed.init(1, 0); verify_changelog_files(); @@ -1228,7 +1256,7 @@ TEST_F(CoordinationTest, ChangelogTestMixedLogTypes) DB::KeeperLogStore changelog( DB::LogFileSettings{.force_sync = true, .compress_logs = false, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); verify_changelog_files(); @@ -1246,16 +1274,16 @@ TEST_F(CoordinationTest, ChangelogTestMixedLogTypes) } } -TEST_P(CoordinationTest, ChangelogTestLostFiles) +TYPED_TEST(CoordinationTest, ChangelogTestLostFiles) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1266,30 +1294,30 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles) changelog.end_of_append_batch(0, 0); waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + this->extension)); - fs::remove("./logs/changelog_1_20.bin" + params.extension); + fs::remove("./logs/changelog_1_20.bin" + this->extension); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 20}, DB::FlushSettings(), - keeper_context); + this->keeper_context); /// It should print error message, but still able to start changelog_reader.init(5, 0); - assertBrokenFileRemoved("./logs", "changelog_21_40.bin" + params.extension); + assertBrokenFileRemoved("./logs", "changelog_21_40.bin" + this->extension); } -TEST_P(CoordinationTest, ChangelogTestLostFiles2) +TYPED_TEST(CoordinationTest, ChangelogTestLostFiles2) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 10}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t i = 0; i < 35; ++i) @@ -1301,24 +1329,24 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles2) waitDurableLogs(changelog); - EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_21_30.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_31_40.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_21_30.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_31_40.bin" + this->extension)); // we have a gap in our logs, we need to remove all the logs after the gap - fs::remove("./logs/changelog_21_30.bin" + params.extension); + fs::remove("./logs/changelog_21_30.bin" + this->extension); DB::KeeperLogStore changelog_reader( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 10}, DB::FlushSettings(), - keeper_context); + this->keeper_context); /// It should print error message, but still able to start changelog_reader.init(5, 0); - EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + this->extension)); - assertBrokenFileRemoved("./logs", "changelog_31_40.bin" + params.extension); + assertBrokenFileRemoved("./logs", "changelog_31_40.bin" + this->extension); } struct IntNode { @@ -1334,7 +1362,7 @@ struct IntNode bool operator!=(const int & rhs) const { return rhs != this->value; } }; -TEST_P(CoordinationTest, SnapshotableHashMapSimple) +TYPED_TEST(CoordinationTest, SnapshotableHashMapSimple) { DB::SnapshotableHashTable hello; EXPECT_TRUE(hello.insert("hello", 5).second); @@ -1349,7 +1377,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapSimple) EXPECT_EQ(hello.size(), 0); } -TEST_P(CoordinationTest, SnapshotableHashMapTrySnapshot) +TYPED_TEST(CoordinationTest, SnapshotableHashMapTrySnapshot) { DB::SnapshotableHashTable map_snp; EXPECT_TRUE(map_snp.insert("/hello", 7).second); @@ -1426,7 +1454,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapTrySnapshot) map_snp.disableSnapshotMode(); } -TEST_P(CoordinationTest, SnapshotableHashMapDataSize) +TYPED_TEST(CoordinationTest, SnapshotableHashMapDataSize) { /// int DB::SnapshotableHashTable hello; @@ -1464,7 +1492,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize) EXPECT_EQ(hello.getApproximateDataSize(), 0); /// Node - using Node = DB::KeeperStorage::Node; + using Node = DB::KeeperMemoryStorage::Node; DB::SnapshotableHashTable world; Node n1; n1.setData("1234"); @@ -1503,9 +1531,10 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize) EXPECT_EQ(world.getApproximateDataSize(), 0); } -void addNode(DB::KeeperStorage & storage, const std::string & path, const std::string & data, int64_t ephemeral_owner = 0) +template +void addNode(Storage & storage, const std::string & path, const std::string & data, int64_t ephemeral_owner = 0) { - using Node = DB::KeeperStorage::Node; + using Node = typename Storage::Node; Node node{}; node.setData(data); node.setEphemeralOwner(ephemeral_owner); @@ -1521,15 +1550,20 @@ void addNode(DB::KeeperStorage & storage, const std::string & path, const std::s }); } -TEST_P(CoordinationTest, TestStorageSnapshotSimple) +TYPED_TEST(CoordinationTest, TestStorageSnapshotSimple) { - auto params = GetParam(); + ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); + using Storage = typename TestFixture::Storage; - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + + Storage storage(500, "", this->keeper_context); addNode(storage, "/hello1", "world", 1); addNode(storage, "/hello2", "somedata", 3); storage.session_id_counter = 5; @@ -1539,7 +1573,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple) storage.getSessionID(130); storage.getSessionID(130); - DB::KeeperStorageSnapshot snapshot(&storage, 2); + DB::KeeperStorageSnapshot snapshot(&storage, 2); EXPECT_EQ(snapshot.snapshot_meta->get_last_log_idx(), 2); EXPECT_EQ(snapshot.session_id, 7); @@ -1548,7 +1582,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple) auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 2); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin" + this->extension)); auto debuf = manager.deserializeSnapshotBufferFromDisk(2); @@ -1571,15 +1605,20 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple) EXPECT_EQ(restored_storage->session_and_timeout.size(), 2); } -TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) +TYPED_TEST(CoordinationTest, TestStorageSnapshotMoreWrites) { - auto params = GetParam(); + ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); + using Storage = typename TestFixture::Storage; - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + + Storage storage(500, "", this->keeper_context); storage.getSessionID(130); for (size_t i = 0; i < 50; ++i) @@ -1587,7 +1626,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i)); } - DB::KeeperStorageSnapshot snapshot(&storage, 50); + DB::KeeperStorageSnapshot snapshot(&storage, 50); EXPECT_EQ(snapshot.snapshot_meta->get_last_log_idx(), 50); EXPECT_EQ(snapshot.snapshot_container_size, 54); @@ -1600,7 +1639,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 50); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + this->extension)); auto debuf = manager.deserializeSnapshotBufferFromDisk(50); @@ -1614,15 +1653,20 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites) } -TEST_P(CoordinationTest, TestStorageSnapshotManySnapshots) +TYPED_TEST(CoordinationTest, TestStorageSnapshotManySnapshots) { - auto params = GetParam(); + ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); + using Storage = typename TestFixture::Storage; - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + + Storage storage(500, "", this->keeper_context); storage.getSessionID(130); for (size_t j = 1; j <= 5; ++j) @@ -1632,17 +1676,17 @@ TEST_P(CoordinationTest, TestStorageSnapshotManySnapshots) addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i)); } - DB::KeeperStorageSnapshot snapshot(&storage, j * 50); + DB::KeeperStorageSnapshot snapshot(&storage, j * 50); auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, j * 50); - EXPECT_TRUE(fs::exists(std::string{"./snapshots/snapshot_"} + std::to_string(j * 50) + ".bin" + params.extension)); + EXPECT_TRUE(fs::exists(std::string{"./snapshots/snapshot_"} + std::to_string(j * 50) + ".bin" + this->extension)); } - EXPECT_FALSE(fs::exists("./snapshots/snapshot_50.bin" + params.extension)); - EXPECT_FALSE(fs::exists("./snapshots/snapshot_100.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_150.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_200.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_250.bin" + params.extension)); + EXPECT_FALSE(fs::exists("./snapshots/snapshot_50.bin" + this->extension)); + EXPECT_FALSE(fs::exists("./snapshots/snapshot_100.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_150.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_200.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_250.bin" + this->extension)); auto [restored_storage, meta, _] = manager.restoreFromLatestSnapshot(); @@ -1655,21 +1699,26 @@ TEST_P(CoordinationTest, TestStorageSnapshotManySnapshots) } } -TEST_P(CoordinationTest, TestStorageSnapshotMode) +TYPED_TEST(CoordinationTest, TestStorageSnapshotMode) { - auto params = GetParam(); - ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest test("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + Storage storage(500, "", this->keeper_context); for (size_t i = 0; i < 50; ++i) { addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i)); } { - DB::KeeperStorageSnapshot snapshot(&storage, 50); + DB::KeeperStorageSnapshot snapshot(&storage, 50); for (size_t i = 0; i < 50; ++i) { addNode(storage, "/hello_" + std::to_string(i), "wlrd_" + std::to_string(i)); @@ -1684,12 +1733,15 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode) storage.container.erase("/hello_" + std::to_string(i)); } EXPECT_EQ(storage.container.size(), 29); - EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 105); + if constexpr (Storage::use_rocksdb) + EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 54); + else + EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 105); EXPECT_EQ(storage.container.snapshotSizeWithVersion().second, 1); auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 50); } - EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + this->extension)); EXPECT_EQ(storage.container.size(), 29); storage.clearGarbageAfterSnapshot(); EXPECT_EQ(storage.container.snapshotSizeWithVersion().first, 29); @@ -1709,28 +1761,33 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode) } } -TEST_P(CoordinationTest, TestStorageSnapshotBroken) +TYPED_TEST(CoordinationTest, TestStorageSnapshotBroken) { - auto params = GetParam(); - ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest test("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + Storage storage(500, "", this->keeper_context); for (size_t i = 0; i < 50; ++i) { addNode(storage, "/hello_" + std::to_string(i), "world_" + std::to_string(i)); } { - DB::KeeperStorageSnapshot snapshot(&storage, 50); + DB::KeeperStorageSnapshot snapshot(&storage, 50); auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 50); } - EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_50.bin" + this->extension)); /// Let's corrupt file DB::WriteBufferFromFile plain_buf( - "./snapshots/snapshot_50.bin" + params.extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); + "./snapshots/snapshot_50.bin" + this->extension, DB::DBMS_DEFAULT_BUFFER_SIZE, O_APPEND | O_CREAT | O_WRONLY); plain_buf.truncate(34); plain_buf.sync(); @@ -1746,7 +1803,7 @@ nuraft::ptr getBufferFromZKRequest(int64_t session_id, int64_t z auto time = duration_cast(system_clock::now().time_since_epoch()).count(); DB::writeIntBinary(time, buf); DB::writeIntBinary(zxid, buf); - DB::writeIntBinary(DB::KeeperStorage::DigestVersion::NO_DIGEST, buf); + DB::writeIntBinary(DB::KeeperMemoryStorage::DigestVersion::NO_DIGEST, buf); return buf.getBuffer(); } @@ -1757,6 +1814,7 @@ getLogEntryFromZKRequest(size_t term, int64_t session_id, int64_t zxid, const Co return nuraft::cs_new(term, buffer); } +template void testLogAndStateMachine( DB::CoordinationSettingsPtr settings, uint64_t total_logs, @@ -1767,12 +1825,15 @@ void testLogAndStateMachine( ChangelogDirTest snapshots("./snapshots"); ChangelogDirTest logs("./logs"); + ChangelogDirTest rocks("./rocksdb"); auto get_keeper_context = [&] { auto local_keeper_context = std::make_shared(true, settings); local_keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); local_keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); + local_keeper_context->setRocksDBDisk(std::make_shared("RocksDisk", "./rocksdb")); + local_keeper_context->setRocksDBOptions(); return local_keeper_context; }; @@ -1780,7 +1841,7 @@ void testLogAndStateMachine( SnapshotsQueue snapshots_queue{1}; auto keeper_context = get_keeper_context(); - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + auto state_machine = std::make_shared>(queue, snapshots_queue, keeper_context, nullptr); state_machine->init(); DB::KeeperLogStore changelog( @@ -1827,7 +1888,7 @@ void testLogAndStateMachine( SnapshotsQueue snapshots_queue1{1}; keeper_context = get_keeper_context(); - auto restore_machine = std::make_shared(queue, snapshots_queue1, keeper_context, nullptr); + auto restore_machine = std::make_shared>(queue, snapshots_queue1, keeper_context, nullptr); restore_machine->init(); EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance); @@ -1863,11 +1924,12 @@ void testLogAndStateMachine( } } -TEST_P(CoordinationTest, TestStateMachineAndLogStore) +TYPED_TEST(CoordinationTest, TestStateMachineAndLogStore) { using namespace Coordination; using namespace DB; - auto params = GetParam(); + + using Storage = typename TestFixture::Storage; { CoordinationSettingsPtr settings = std::make_shared(); @@ -1875,78 +1937,83 @@ TEST_P(CoordinationTest, TestStateMachineAndLogStore) settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 37, params.enable_compression); + testLogAndStateMachine(settings, 37, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 11, params.enable_compression); + testLogAndStateMachine(settings, 11, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 40, params.enable_compression); + testLogAndStateMachine(settings, 40, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 20; settings->rotate_log_storage_interval = 30; - testLogAndStateMachine(settings, 40, params.enable_compression); + testLogAndStateMachine(settings, 40, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 0; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 40, params.enable_compression); + testLogAndStateMachine(settings, 40, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 1; settings->reserved_log_items = 1; settings->rotate_log_storage_interval = 32; - testLogAndStateMachine(settings, 32, params.enable_compression); + testLogAndStateMachine(settings, 32, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 7; settings->rotate_log_storage_interval = 1; - testLogAndStateMachine(settings, 33, params.enable_compression); + testLogAndStateMachine(settings, 33, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 37; settings->reserved_log_items = 1000; settings->rotate_log_storage_interval = 5000; - testLogAndStateMachine(settings, 33, params.enable_compression); + testLogAndStateMachine(settings, 33, this->enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 37; settings->reserved_log_items = 1000; settings->rotate_log_storage_interval = 5000; - testLogAndStateMachine(settings, 45, params.enable_compression); + testLogAndStateMachine(settings, 45, this->enable_compression); } } -TEST_P(CoordinationTest, TestEphemeralNodeRemove) +TYPED_TEST(CoordinationTest, TestEphemeralNodeRemove) { using namespace Coordination; using namespace DB; ChangelogDirTest snapshots("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); state_machine->init(); std::shared_ptr request_c = std::make_shared(); @@ -1969,21 +2036,27 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) } -TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitted) +TYPED_TEST(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitted) { using namespace Coordination; using namespace DB; ChangelogDirTest snapshots("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); state_machine->init(); String user_auth_data = "test_user:test_password"; - String digest = KeeperStorage::generateDigest(user_auth_data); + String digest = KeeperMemoryStorage::generateDigest(user_auth_data); std::shared_ptr auth_req = std::make_shared(); auth_req->scheme = "digest"; @@ -2019,20 +2092,25 @@ TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitte EXPECT_EQ(acls[0].permissions, 31); } -TEST_P(CoordinationTest, TestPreprocessWhenCloseSessionIsPrecommitted) +TYPED_TEST(CoordinationTest, TestPreprocessWhenCloseSessionIsPrecommitted) { using namespace Coordination; using namespace DB; ChangelogDirTest snapshots("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; int64_t session_without_auth = 1; int64_t session_with_auth = 2; size_t term = 0; - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); state_machine->init(); auto & storage = state_machine->getStorageUnsafe(); @@ -2199,22 +2277,26 @@ TEST_P(CoordinationTest, TestPreprocessWhenCloseSessionIsPrecommitted) } } -TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) +TYPED_TEST(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) { using namespace Coordination; using namespace DB; ChangelogDirTest snapshots("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + using Storage = typename TestFixture::Storage; + auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); state_machine->init(); String user_auth_data = "test_user:test_password"; - String digest = KeeperStorage::generateDigest(user_auth_data); + String digest = KeeperMemoryStorage::generateDigest(user_auth_data); std::shared_ptr auth_req = std::make_shared(); auth_req->scheme = "digest"; @@ -2257,17 +2339,17 @@ TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) } -TEST_P(CoordinationTest, TestRotateIntervalChanges) +TYPED_TEST(CoordinationTest, TestRotateIntervalChanges) { using namespace Coordination; - auto params = GetParam(); + ChangelogDirTest snapshots("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); { DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -2283,12 +2365,12 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) } - EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + this->extension)); DB::KeeperLogStore changelog_1( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 10}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 10}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_1.init(0, 50); for (size_t i = 0; i < 55; ++i) { @@ -2301,13 +2383,13 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) waitDurableLogs(changelog_1); - EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + this->extension)); DB::KeeperLogStore changelog_2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 7}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 7}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_2.init(98, 55); for (size_t i = 0; i < 17; ++i) @@ -2321,20 +2403,20 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) waitDurableLogs(changelog_2); - keeper_context->setLastCommitIndex(105); + this->keeper_context->setLastCommitIndex(105); changelog_2.compact(105); std::this_thread::sleep_for(std::chrono::microseconds(1000)); - assertFileDeleted("./logs/changelog_1_100.bin" + params.extension); - EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_111_117.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_118_124.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + params.extension)); + assertFileDeleted("./logs/changelog_1_100.bin" + this->extension); + EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_111_117.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_118_124.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + this->extension)); DB::KeeperLogStore changelog_3( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 5}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 5}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog_3.init(116, 3); for (size_t i = 0; i < 17; ++i) { @@ -2347,20 +2429,20 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) waitDurableLogs(changelog_3); - keeper_context->setLastCommitIndex(125); + this->keeper_context->setLastCommitIndex(125); changelog_3.compact(125); std::this_thread::sleep_for(std::chrono::microseconds(1000)); - assertFileDeleted("./logs/changelog_101_110.bin" + params.extension); - assertFileDeleted("./logs/changelog_111_117.bin" + params.extension); - assertFileDeleted("./logs/changelog_118_124.bin" + params.extension); + assertFileDeleted("./logs/changelog_101_110.bin" + this->extension); + assertFileDeleted("./logs/changelog_111_117.bin" + this->extension); + assertFileDeleted("./logs/changelog_118_124.bin" + this->extension); - EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_132_136.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_137_141.bin" + params.extension)); - EXPECT_TRUE(fs::exists("./logs/changelog_142_146.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_125_131.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_132_136.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_137_141.bin" + this->extension)); + EXPECT_TRUE(fs::exists("./logs/changelog_142_146.bin" + this->extension)); } -TEST_P(CoordinationTest, TestSessionExpiryQueue) +TYPED_TEST(CoordinationTest, TestSessionExpiryQueue) { using namespace Coordination; SessionExpiryQueue queue(500); @@ -2378,16 +2460,15 @@ TEST_P(CoordinationTest, TestSessionExpiryQueue) } -TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) +TYPED_TEST(CoordinationTest, TestCompressedLogsMultipleRewrite) { using namespace Coordination; - auto test_params = GetParam(); ChangelogDirTest logs("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -2402,9 +2483,9 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) waitDurableLogs(changelog); DB::KeeperLogStore changelog1( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog1.init(0, 3); for (size_t i = 55; i < 70; ++i) { @@ -2418,9 +2499,9 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) waitDurableLogs(changelog1); DB::KeeperLogStore changelog2( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog2.init(0, 3); for (size_t i = 70; i < 80; ++i) { @@ -2432,16 +2513,21 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) } } -TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) +TYPED_TEST(CoordinationTest, TestStorageSnapshotDifferentCompressions) { - auto params = GetParam(); + ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); + using Storage = typename TestFixture::Storage; - DB::KeeperStorage storage(500, "", keeper_context); + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); + + Storage storage(500, "", this->keeper_context); addNode(storage, "/hello1", "world", 1); addNode(storage, "/hello2", "somedata", 3); storage.session_id_counter = 5; @@ -2451,13 +2537,13 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) storage.getSessionID(130); storage.getSessionID(130); - DB::KeeperStorageSnapshot snapshot(&storage, 2); + DB::KeeperStorageSnapshot snapshot(&storage, 2); auto buf = manager.serializeSnapshotToBuffer(snapshot); manager.serializeSnapshotBufferToDisk(*buf, 2); - EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin" + params.extension)); + EXPECT_TRUE(fs::exists("./snapshots/snapshot_2.bin" + this->extension)); - DB::KeeperSnapshotManager new_manager(3, keeper_context, !params.enable_compression); + DB::KeeperSnapshotManager new_manager(3, this->keeper_context, !this->enable_compression); auto debuf = new_manager.deserializeSnapshotBufferFromDisk(2); @@ -2479,17 +2565,17 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions) EXPECT_EQ(restored_storage->session_and_timeout.size(), 2); } -TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) +TYPED_TEST(CoordinationTest, ChangelogInsertThreeTimesSmooth) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); { - LOG_INFO(log, "================First time====================="); + LOG_INFO(this->log, "================First time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2499,11 +2585,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) } { - LOG_INFO(log, "================Second time====================="); + LOG_INFO(this->log, "================Second time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2513,11 +2599,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) } { - LOG_INFO(log, "================Third time====================="); + LOG_INFO(this->log, "================Third time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2527,11 +2613,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) } { - LOG_INFO(log, "================Fourth time====================="); + LOG_INFO(this->log, "================Fourth time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); @@ -2542,18 +2628,18 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) } -TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth) +TYPED_TEST(CoordinationTest, ChangelogInsertMultipleTimesSmooth) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); for (size_t i = 0; i < 36; ++i) { - LOG_INFO(log, "================First time====================="); + LOG_INFO(this->log, "================First time====================="); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (size_t j = 0; j < 7; ++j) { @@ -2565,24 +2651,24 @@ TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth) } DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); EXPECT_EQ(changelog.next_slot(), 36 * 7 + 1); } -TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) +TYPED_TEST(CoordinationTest, ChangelogInsertThreeTimesHard) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); { - LOG_INFO(log, "================First time====================="); + LOG_INFO(this->log, "================First time====================="); DB::KeeperLogStore changelog1( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog1.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog1.append(entry); @@ -2592,11 +2678,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) } { - LOG_INFO(log, "================Second time====================="); + LOG_INFO(this->log, "================Second time====================="); DB::KeeperLogStore changelog2( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog2.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog2.append(entry); @@ -2606,11 +2692,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) } { - LOG_INFO(log, "================Third time====================="); + LOG_INFO(this->log, "================Third time====================="); DB::KeeperLogStore changelog3( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog3.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog3.append(entry); @@ -2620,11 +2706,11 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) } { - LOG_INFO(log, "================Fourth time====================="); + LOG_INFO(this->log, "================Fourth time====================="); DB::KeeperLogStore changelog4( - DB::LogFileSettings{.force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog4.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog4.append(entry); @@ -2634,18 +2720,23 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) } } -TEST_P(CoordinationTest, TestStorageSnapshotEqual) +TYPED_TEST(CoordinationTest, TestStorageSnapshotEqual) { - auto params = GetParam(); + ChangelogDirTest test("./snapshots"); - setSnapshotDirectory("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); std::optional snapshot_hash; for (size_t i = 0; i < 15; ++i) { - DB::KeeperSnapshotManager manager(3, keeper_context, params.enable_compression); + DB::KeeperSnapshotManager manager(3, this->keeper_context, this->enable_compression); - DB::KeeperStorage storage(500, "", keeper_context); + Storage storage(500, "", this->keeper_context); addNode(storage, "/hello", ""); for (size_t j = 0; j < 5000; ++j) { @@ -2661,7 +2752,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotEqual) for (size_t j = 0; j < 3333; ++j) storage.getSessionID(130 * j); - DB::KeeperStorageSnapshot snapshot(&storage, storage.zxid); + DB::KeeperStorageSnapshot snapshot(&storage, storage.zxid); auto buf = manager.serializeSnapshotToBuffer(snapshot); @@ -2678,17 +2769,16 @@ TEST_P(CoordinationTest, TestStorageSnapshotEqual) } -TEST_P(CoordinationTest, TestLogGap) +TYPED_TEST(CoordinationTest, TestLogGap) { using namespace Coordination; - auto test_params = GetParam(); ChangelogDirTest logs("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); DB::KeeperLogStore changelog( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(0, 3); for (size_t i = 1; i < 55; ++i) @@ -2701,13 +2791,13 @@ TEST_P(CoordinationTest, TestLogGap) } DB::KeeperLogStore changelog1( - DB::LogFileSettings{.force_sync = true, .compress_logs = test_params.enable_compression, .rotate_interval = 100}, + DB::LogFileSettings{.force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog1.init(61, 3); /// Logs discarded - EXPECT_FALSE(fs::exists("./logs/changelog_1_100.bin" + test_params.extension)); + EXPECT_FALSE(fs::exists("./logs/changelog_1_100.bin" + this->extension)); EXPECT_EQ(changelog1.start_index(), 61); EXPECT_EQ(changelog1.next_slot(), 61); } @@ -2719,12 +2809,17 @@ ResponseType getSingleResponse(const auto & responses) return dynamic_cast(*responses[0].response); } -TEST_P(CoordinationTest, TestUncommittedStateBasicCrud) +TYPED_TEST(CoordinationTest, TestUncommittedStateBasicCrud) { using namespace DB; using namespace Coordination; - DB::KeeperStorage storage{500, "", keeper_context}; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + Storage storage{500, "", this->keeper_context}; constexpr std::string_view path = "/test"; @@ -2836,12 +2931,17 @@ TEST_P(CoordinationTest, TestUncommittedStateBasicCrud) ASSERT_FALSE(get_committed_data()); } -TEST_P(CoordinationTest, TestListRequestTypes) +TYPED_TEST(CoordinationTest, TestListRequestTypes) { using namespace DB; using namespace Coordination; - KeeperStorage storage{500, "", keeper_context}; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + Storage storage{500, "", this->keeper_context}; int32_t zxid = 0; @@ -2918,18 +3018,18 @@ TEST_P(CoordinationTest, TestListRequestTypes) } } -TEST_P(CoordinationTest, TestDurableState) +TYPED_TEST(CoordinationTest, TestDurableState) { ChangelogDirTest logs("./logs"); - setLogDirectory("./logs"); - setStateFileDirectory("."); + this->setLogDirectory("./logs"); + this->setStateFileDirectory("."); auto state = nuraft::cs_new(); std::optional state_manager; const auto reload_state_manager = [&] { - state_manager.emplace(1, "localhost", 9181, keeper_context); + state_manager.emplace(1, "localhost", 9181, this->keeper_context); state_manager->loadLogStore(1, 0); }; @@ -2992,10 +3092,15 @@ TEST_P(CoordinationTest, TestDurableState) } } -TEST_P(CoordinationTest, TestFeatureFlags) +TYPED_TEST(CoordinationTest, TestFeatureFlags) { using namespace Coordination; - KeeperStorage storage{500, "", keeper_context}; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + Storage storage{500, "", this->keeper_context}; auto request = std::make_shared(); request->path = DB::keeper_api_feature_flags_path; auto responses = storage.processRequest(request, 0, std::nullopt, true, true); @@ -3007,14 +3112,19 @@ TEST_P(CoordinationTest, TestFeatureFlags) ASSERT_FALSE(feature_flags.isEnabled(KeeperFeatureFlag::CHECK_NOT_EXISTS)); } -TEST_P(CoordinationTest, TestSystemNodeModify) +TYPED_TEST(CoordinationTest, TestSystemNodeModify) { using namespace Coordination; int64_t zxid{0}; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + // On INIT we abort when a system path is modified - keeper_context->setServerState(KeeperContext::Phase::RUNNING); - KeeperStorage storage{500, "", keeper_context}; + this->keeper_context->setServerState(KeeperContext::Phase::RUNNING); + Storage storage{500, "", this->keeper_context}; const auto assert_create = [&](const std::string_view path, const auto expected_code) { auto request = std::make_shared(); @@ -3039,11 +3149,11 @@ TEST_P(CoordinationTest, TestSystemNodeModify) assert_create("/keeper1/test", Error::ZOK); } -TEST_P(CoordinationTest, ChangelogTestMaxLogSize) +TYPED_TEST(CoordinationTest, ChangelogTestMaxLogSize) { - auto params = GetParam(); + ChangelogDirTest test("./logs"); - setLogDirectory("./logs"); + this->setLogDirectory("./logs"); uint64_t last_entry_index{0}; size_t i{0}; @@ -3051,9 +3161,9 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) SCOPED_TRACE("Small rotation interval, big size limit"); DB::KeeperLogStore changelog( DB::LogFileSettings{ - .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 20, .max_size = 50 * 1024 * 1024}, + .force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 20, .max_size = 50 * 1024 * 1024}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); for (; i < 100; ++i) @@ -3071,9 +3181,9 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) SCOPED_TRACE("Large rotation interval, small size limit"); DB::KeeperLogStore changelog( DB::LogFileSettings{ - .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000}, + .force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100'000, .max_size = 4000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10); @@ -3093,20 +3203,25 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) SCOPED_TRACE("Final verify all logs"); DB::KeeperLogStore changelog( DB::LogFileSettings{ - .force_sync = true, .compress_logs = params.enable_compression, .rotate_interval = 100'000, .max_size = 4000}, + .force_sync = true, .compress_logs = this->enable_compression, .rotate_interval = 100'000, .max_size = 4000}, DB::FlushSettings(), - keeper_context); + this->keeper_context); changelog.init(1, 0); ASSERT_EQ(changelog.entry_at(last_entry_index)->get_term(), (i - 1 + 44) * 10); } } -TEST_P(CoordinationTest, TestCheckNotExistsRequest) +TYPED_TEST(CoordinationTest, TestCheckNotExistsRequest) { using namespace DB; using namespace Coordination; - KeeperStorage storage{500, "", keeper_context}; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + + Storage storage{500, "", this->keeper_context}; int32_t zxid = 0; @@ -3174,18 +3289,23 @@ TEST_P(CoordinationTest, TestCheckNotExistsRequest) } } -TEST_P(CoordinationTest, TestReapplyingDeltas) +TYPED_TEST(CoordinationTest, TestReapplyingDeltas) { using namespace DB; using namespace Coordination; + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + static constexpr int64_t initial_zxid = 100; const auto create_request = std::make_shared(); create_request->path = "/test/data"; create_request->is_sequential = true; - const auto process_create = [](KeeperStorage & storage, const auto & request, int64_t zxid) + const auto process_create = [](Storage & storage, const auto & request, int64_t zxid) { storage.preprocessRequest(request, 1, 0, zxid); auto responses = storage.processRequest(request, 1, zxid); @@ -3206,19 +3326,19 @@ TEST_P(CoordinationTest, TestReapplyingDeltas) process_create(storage, create_request, zxid); }; - KeeperStorage storage1{500, "", keeper_context}; + Storage storage1{500, "", this->keeper_context}; commit_initial_data(storage1); for (int64_t zxid = initial_zxid + 1; zxid < initial_zxid + 50; ++zxid) storage1.preprocessRequest(create_request, 1, 0, zxid, /*check_acl=*/true, /*digest=*/std::nullopt, /*log_idx=*/zxid); /// create identical new storage - KeeperStorage storage2{500, "", keeper_context}; + Storage storage2{500, "", this->keeper_context}; commit_initial_data(storage2); storage1.applyUncommittedState(storage2, initial_zxid); - const auto commit_unprocessed = [&](KeeperStorage & storage) + const auto commit_unprocessed = [&](Storage & storage) { for (int64_t zxid = initial_zxid + 1; zxid < initial_zxid + 50; ++zxid) { @@ -3231,7 +3351,7 @@ TEST_P(CoordinationTest, TestReapplyingDeltas) commit_unprocessed(storage1); commit_unprocessed(storage2); - const auto get_children = [&](KeeperStorage & storage) + const auto get_children = [&](Storage & storage) { const auto list_request = std::make_shared(); list_request->path = "/test"; @@ -3251,8 +3371,8 @@ TEST_P(CoordinationTest, TestReapplyingDeltas) ASSERT_TRUE(children1_set == children2_set); } -INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, - CoordinationTest, - ::testing::ValuesIn(std::initializer_list{CompressionParam{true, ".zstd"}, CompressionParam{false, ""}})); +/// INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, +/// CoordinationTest, +/// ::testing::ValuesIn(std::initializer_list{CompressionParam{true, ".zstd"}, CompressionParam{false, ""}})); #endif diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index bc72c996384..af681cd5639 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/src/Core/LoadBalancing.h b/src/Core/LoadBalancing.h new file mode 100644 index 00000000000..23758895ce2 --- /dev/null +++ b/src/Core/LoadBalancing.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +enum class LoadBalancing : uint8_t +{ + /// among replicas with a minimum number of errors selected randomly + RANDOM = 0, + /// a replica is selected among the replicas with the minimum number of errors + /// with the minimum number of distinguished characters in the replica name prefix and local hostname prefix + NEAREST_HOSTNAME, + /// just like NEAREST_HOSTNAME, but it count distinguished characters in a levenshtein distance manner + HOSTNAME_LEVENSHTEIN_DISTANCE, + // replicas with the same number of errors are accessed in the same order + // as they are specified in the configuration. + IN_ORDER, + /// if first replica one has higher number of errors, + /// pick a random one from replicas with minimum number of errors + FIRST_OR_RANDOM, + // round robin across replicas with the same number of errors. + ROUND_ROBIN, +}; + +} diff --git a/src/Core/QueryLogElementType.h b/src/Core/QueryLogElementType.h new file mode 100644 index 00000000000..99361f945cc --- /dev/null +++ b/src/Core/QueryLogElementType.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +namespace DB +{ + +// Make it signed for compatibility with DataTypeEnum8 +enum QueryLogElementType : int8_t +{ + QUERY_START = 1, + QUERY_FINISH = 2, + EXCEPTION_BEFORE_START = 3, + EXCEPTION_WHILE_PROCESSING = 4, +}; + +} diff --git a/src/Core/SchemaInferenceMode.h b/src/Core/SchemaInferenceMode.h new file mode 100644 index 00000000000..17d1d62a60e --- /dev/null +++ b/src/Core/SchemaInferenceMode.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace DB +{ + +enum class SchemaInferenceMode : uint8_t +{ + DEFAULT, + UNION, +}; + +} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 52fa28a4481..3d181e33001 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -36,7 +36,7 @@ class IColumn; M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ + M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \ M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ @@ -151,7 +151,7 @@ class IColumn; M(UInt64, max_local_write_bandwidth, 0, "The maximum speed of local writes in bytes per second.", 0) \ M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams, and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \ M(String, stream_like_engine_insert_queue, "", "When stream like engine reads from multiple queues, user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.", 0) \ - \ + M(Bool, dictionary_validate_primary_key_type, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64.", 0) \ M(Bool, distributed_insert_skip_read_only_replicas, false, "If true, INSERT into Distributed will skip read-only replicas.", 0) \ M(Bool, distributed_foreground_insert, false, "If setting is enabled, insert query into distributed waits until data are sent to all nodes in a cluster. \n\nEnables or disables synchronous data insertion into a `Distributed` table.\n\nBy default, when inserting data into a Distributed table, the ClickHouse server sends data to cluster nodes in the background. When `distributed_foreground_insert` = 1, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true).", 0) ALIAS(insert_distributed_sync) \ M(UInt64, distributed_background_insert_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.", 0) ALIAS(insert_distributed_timeout) \ @@ -502,6 +502,7 @@ class IColumn; M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \ M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \ M(UInt64, backup_restore_batch_size_for_keeper_multi, 1000, "Maximum size of batch for multi request to [Zoo]Keeper during backup or restore", 0) \ + M(UInt64, backup_restore_s3_retry_attempts, 1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore.", 0) \ M(UInt64, max_backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \ \ M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \ @@ -608,9 +609,8 @@ class IColumn; M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ @@ -976,6 +976,7 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ + MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 0) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b9b72209103..de4725dc350 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -67,6 +67,7 @@ static std::initializer_list +#include #include +#include +#include #include +#include #include #include #include @@ -115,25 +119,6 @@ constexpr auto getEnumValues(); return getEnumValues().size();\ } -enum class LoadBalancing : uint8_t -{ - /// among replicas with a minimum number of errors selected randomly - RANDOM = 0, - /// a replica is selected among the replicas with the minimum number of errors - /// with the minimum number of distinguished characters in the replica name prefix and local hostname prefix - NEAREST_HOSTNAME, - /// just like NEAREST_HOSTNAME, but it count distinguished characters in a levenshtein distance manner - HOSTNAME_LEVENSHTEIN_DISTANCE, - // replicas with the same number of errors are accessed in the same order - // as they are specified in the configuration. - IN_ORDER, - /// if first replica one has higher number of errors, - /// pick a random one from replicas with minimum number of errors - FIRST_OR_RANDOM, - // round robin across replicas with the same number of errors. - ROUND_ROBIN, -}; - DECLARE_SETTING_ENUM(LoadBalancing) DECLARE_SETTING_ENUM(JoinStrictness) @@ -204,16 +189,6 @@ DECLARE_SETTING_ENUM_WITH_RENAME(ParquetVersion, FormatSettings::ParquetVersion) DECLARE_SETTING_ENUM(LogsLevel) - -// Make it signed for compatibility with DataTypeEnum8 -enum QueryLogElementType : int8_t -{ - QUERY_START = 1, - QUERY_FINISH = 2, - EXCEPTION_BEFORE_START = 3, - EXCEPTION_WHILE_PROCESSING = 4, -}; - DECLARE_SETTING_ENUM_WITH_RENAME(LogQueriesType, QueryLogElementType) @@ -292,13 +267,6 @@ enum class StreamingHandleErrorMode : uint8_t DECLARE_SETTING_ENUM(StreamingHandleErrorMode) -enum class ShortCircuitFunctionEvaluation : uint8_t -{ - ENABLE, // Use short-circuit function evaluation for functions that are suitable for it. - FORCE_ENABLE, // Use short-circuit function evaluation for all functions. - DISABLE, // Disable short-circuit function evaluation. -}; - DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) enum class TransactionsWaitCSNMode : uint8_t @@ -367,12 +335,6 @@ DECLARE_SETTING_ENUM(ObjectStorageQueueAction) DECLARE_SETTING_ENUM(ExternalCommandStderrReaction) -enum class SchemaInferenceMode : uint8_t -{ - DEFAULT, - UNION, -}; - DECLARE_SETTING_ENUM(SchemaInferenceMode) DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateTimeOverflowBehavior) diff --git a/src/Core/ShortCircuitFunctionEvaluation.h b/src/Core/ShortCircuitFunctionEvaluation.h new file mode 100644 index 00000000000..3785efe8015 --- /dev/null +++ b/src/Core/ShortCircuitFunctionEvaluation.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace DB +{ + +enum class ShortCircuitFunctionEvaluation : uint8_t +{ + ENABLE, // Use short-circuit function evaluation for functions that are suitable for it. + FORCE_ENABLE, // Use short-circuit function evaluation for all functions. + DISABLE, // Disable short-circuit function evaluation. +}; + +} diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h index 33fd6017599..5c6f3e3150a 100644 --- a/src/Core/SortDescription.h +++ b/src/Core/SortDescription.h @@ -1,15 +1,16 @@ #pragma once -#include -#include -#include -#include + #include -#include #include #include #include +#include +#include +#include +#include + namespace DB { diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 48f76769a09..d77878415db 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -19,7 +20,6 @@ #endif #include #include -#include #include #include @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -53,7 +54,6 @@ #include #include #include -#include #include #include #include @@ -78,8 +78,6 @@ namespace DB { namespace ErrorCodes { - extern const int CANNOT_SET_SIGNAL_HANDLER; - extern const int CANNOT_SEND_SIGNAL; extern const int SYSTEM_ERROR; extern const int LOGICAL_ERROR; } @@ -87,114 +85,6 @@ namespace DB using namespace DB; -PipeFDs signal_pipe; - - -/** Reset signal handler to the default and send signal to itself. - * It's called from user signal handler to write core dump. - */ -static void call_default_signal_handler(int sig) -{ - if (SIG_ERR == signal(sig, SIG_DFL)) - throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); - - if (0 != raise(sig)) - throw ErrnoException(ErrorCodes::CANNOT_SEND_SIGNAL, "Cannot send signal"); -} - -static const size_t signal_pipe_buf_size = - sizeof(int) - + sizeof(siginfo_t) - + sizeof(ucontext_t*) - + sizeof(StackTrace) - + sizeof(UInt64) - + sizeof(UInt32) - + sizeof(void*); - -using signal_function = void(int, siginfo_t*, void*); - -static void writeSignalIDtoSignalPipe(int sig) -{ - auto saved_errno = errno; /// We must restore previous value of errno in signal handler. - - char buf[signal_pipe_buf_size]; - WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf); - writeBinary(sig, out); - out.next(); - - errno = saved_errno; -} - -/** Signal handler for HUP */ -static void closeLogsSignalHandler(int sig, siginfo_t *, void *) -{ - DENY_ALLOCATIONS_IN_SCOPE; - writeSignalIDtoSignalPipe(sig); -} - -static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) -{ - DENY_ALLOCATIONS_IN_SCOPE; - writeSignalIDtoSignalPipe(sig); -} - - -static std::atomic_flag fatal_error_printed; - -/** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log. - */ -static void signalHandler(int sig, siginfo_t * info, void * context) -{ - if (asynchronous_stack_unwinding && sig == SIGSEGV) - siglongjmp(asynchronous_stack_unwinding_signal_jump_buffer, 1); - - DENY_ALLOCATIONS_IN_SCOPE; - auto saved_errno = errno; /// We must restore previous value of errno in signal handler. - - char buf[signal_pipe_buf_size]; - WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf); - - const ucontext_t * signal_context = reinterpret_cast(context); - const StackTrace stack_trace(*signal_context); - -#if USE_GWP_ASAN - if (const auto fault_address = reinterpret_cast(info->si_addr); - GWPAsan::isGWPAsanError(fault_address)) - GWPAsan::printReport(fault_address); -#endif - - writeBinary(sig, out); - writePODBinary(*info, out); - writePODBinary(signal_context, out); - writePODBinary(stack_trace, out); - writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector{}, out); - writeBinary(static_cast(getThreadId()), out); - writePODBinary(current_thread, out); - - out.next(); - - if (sig != SIGTSTP) /// This signal is used for debugging. - { - /// The time that is usually enough for separate thread to print info into log. - /// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case. - for (size_t i = 0; i < 300; ++i) - { - /// We will synchronize with the thread printing the messages with an atomic variable to finish earlier. - if (fatal_error_printed.test()) - break; - - /// This coarse method of synchronization is perfectly ok for fatal signals. - sleepForSeconds(1); - } - - /// Wait for all logs flush operations - sleepForSeconds(3); - call_default_signal_handler(sig); - } - - errno = saved_errno; -} - static bool getenvBool(const char * name) { @@ -206,445 +96,6 @@ static bool getenvBool(const char * name) } -/// Avoid link time dependency on DB/Interpreters - will use this function only when linked. -__attribute__((__weak__)) void collectCrashLog( - Int32 signal, UInt64 thread_id, const String & query_id, const StackTrace & stack_trace); - - -/** The thread that read info about signal or std::terminate from pipe. - * On HUP, close log files (for new files to be opened later). - * On information about std::terminate, write it to log. - * On other signals, write info to log. - */ -class SignalListener : public Poco::Runnable -{ -public: - static constexpr int StdTerminate = -1; - static constexpr int StopThread = -2; - static constexpr int SanitizerTrap = -3; - - explicit SignalListener(BaseDaemon & daemon_) - : log(getLogger("BaseDaemon")) - , daemon(daemon_) - { - } - - void run() override - { - static_assert(PIPE_BUF >= 512); - static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512"); - char buf[signal_pipe_buf_size]; - ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf); - - while (!in.eof()) - { - int sig = 0; - readBinary(sig, in); - // We may log some specific signals afterwards, with different log - // levels and more info, but for completeness we log all signals - // here at trace level. - // Don't use strsignal here, because it's not thread-safe. - LOG_TRACE(log, "Received signal {}", sig); - - if (sig == StopThread) - { - LOG_INFO(log, "Stop SignalListener thread"); - break; - } - else if (sig == SIGHUP) - { - LOG_DEBUG(log, "Received signal to close logs."); - BaseDaemon::instance().closeLogs(BaseDaemon::instance().logger()); - LOG_INFO(log, "Opened new log file after received signal."); - } - else if (sig == StdTerminate) - { - UInt32 thread_num; - std::string message; - - readBinary(thread_num, in); - readBinary(message, in); - - onTerminate(message, thread_num); - } - else if (sig == SIGINT || - sig == SIGQUIT || - sig == SIGTERM) - { - daemon.handleSignal(sig); - } - else - { - siginfo_t info{}; - ucontext_t * context{}; - StackTrace stack_trace(NoCapture{}); - std::vector thread_frame_pointers; - UInt32 thread_num{}; - ThreadStatus * thread_ptr{}; - - if (sig != SanitizerTrap) - { - readPODBinary(info, in); - readPODBinary(context, in); - } - - readPODBinary(stack_trace, in); - - if (sig != SanitizerTrap) - readVectorBinary(thread_frame_pointers, in); - - readBinary(thread_num, in); - readPODBinary(thread_ptr, in); - - /// This allows to receive more signals if failure happens inside onFault function. - /// Example: segfault while symbolizing stack trace. - try - { - std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_frame_pointers, thread_num, thread_ptr); }).detach(); - } - catch (...) - { - /// Likely cannot allocate thread - onFault(sig, info, context, stack_trace, thread_frame_pointers, thread_num, thread_ptr); - } - } - } - } - -private: - LoggerPtr log; - BaseDaemon & daemon; - - void onTerminate(std::string_view message, UInt32 thread_num) const - { - size_t pos = message.find('\n'); - - LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) {}", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, thread_num, message.substr(0, pos)); - - /// Print trace from std::terminate exception line-by-line to make it easy for grep. - while (pos != std::string_view::npos) - { - ++pos; - size_t next_pos = message.find('\n', pos); - size_t size = next_pos; - if (next_pos != std::string_view::npos) - size = next_pos - pos; - - LOG_FATAL(log, fmt::runtime(message.substr(pos, size))); - pos = next_pos; - } - } - - void onFault( - int sig, - const siginfo_t & info, - ucontext_t * context, - const StackTrace & stack_trace, - const std::vector & thread_frame_pointers, - UInt32 thread_num, - ThreadStatus * thread_ptr) const - try - { - ThreadStatus thread_status; - - /// First log those fields that are safe to access and that should not cause new fault. - /// That way we will have some duplicated info in the log but we don't loose important info - /// in case of double fault. - - LOG_FATAL(log, "########## Short fault info ############"); - LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, - thread_num, sig); - - std::string signal_description = "Unknown signal"; - - /// Some of these are not really signals, but our own indications on failure reason. - if (sig == StdTerminate) - signal_description = "std::terminate"; - else if (sig == SanitizerTrap) - signal_description = "sanitizer trap"; - else if (sig >= 0) - signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context - - LOG_FATAL(log, "Signal description: {}", signal_description); - - String error_message; - - if (sig != SanitizerTrap) - error_message = signalToErrorMessage(sig, info, *context); - else - error_message = "Sanitizer trap."; - - LOG_FATAL(log, fmt::runtime(error_message)); - - String bare_stacktrace_str; - if (stack_trace.getSize()) - { - /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace. - /// NOTE: This still require memory allocations and mutex lock inside logger. - /// BTW we can also print it to stderr using write syscalls. - - WriteBufferFromOwnString bare_stacktrace; - writeString("Stack trace:", bare_stacktrace); - for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) - { - writeChar(' ', bare_stacktrace); - writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace); - } - - LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); - bare_stacktrace_str = bare_stacktrace.str(); - } - - /// Now try to access potentially unsafe data in thread_ptr. - - String query_id; - String query; - - /// Send logs from this thread to client if possible. - /// It will allow client to see failure messages directly. - if (thread_ptr) - { - query_id = thread_ptr->getQueryId(); - query = thread_ptr->getQueryForLog(); - - if (auto logs_queue = thread_ptr->getInternalTextLogsQueue()) - { - CurrentThread::attachInternalTextLogsQueue(logs_queue, LogsLevel::trace); - } - } - - LOG_FATAL(log, "########################################"); - - if (query_id.empty()) - { - LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (no query) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, - thread_num, signal_description, sig); - } - else - { - LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, - thread_num, query_id, query, signal_description, sig); - } - - LOG_FATAL(log, fmt::runtime(error_message)); - - if (!bare_stacktrace_str.empty()) - { - LOG_FATAL(log, fmt::runtime(bare_stacktrace_str)); - } - - /// Write symbolized stack trace line by line for better grep-ability. - stack_trace.toStringEveryLine([&](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); }); - - /// In case it's a scheduled job write all previous jobs origins call stacks - std::for_each(thread_frame_pointers.rbegin(), thread_frame_pointers.rend(), - [this](const StackTrace::FramePointers & frame_pointers) - { - if (size_t size = std::ranges::find(frame_pointers, nullptr) - frame_pointers.begin()) - { - LOG_FATAL(log, "========================================"); - WriteBufferFromOwnString bare_stacktrace; - writeString("Job's origin stack trace:", bare_stacktrace); - std::for_each_n(frame_pointers.begin(), size, - [&bare_stacktrace](const void * ptr) - { - writeChar(' ', bare_stacktrace); - writePointerHex(ptr, bare_stacktrace); - } - ); - - LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); - - StackTrace::toStringEveryLine(const_cast(frame_pointers.data()), 0, size, [this](std::string_view s) { LOG_FATAL(log, fmt::runtime(s)); }); - } - } - ); - - -#if defined(OS_LINUX) - /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace. - /// Please keep the below log messages in-sync with the ones in programs/server/Server.cpp - - if (daemon.stored_binary_hash.empty()) - { - LOG_FATAL(log, "Integrity check of the executable skipped because the reference checksum could not be read."); - } - else - { - String calculated_binary_hash = getHashOfLoadedBinaryHex(); - if (calculated_binary_hash == daemon.stored_binary_hash) - { - LOG_FATAL(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash); - } - else - { - LOG_FATAL( - log, - "Calculated checksum of the executable ({0}) does not correspond" - " to the reference checksum stored in the executable ({1})." - " This may indicate one of the following:" - " - the executable was changed just after startup;" - " - the executable was corrupted on disk due to faulty hardware;" - " - the loaded executable was corrupted in memory due to faulty hardware;" - " - the file was intentionally modified;" - " - a logical error in the code.", - calculated_binary_hash, - daemon.stored_binary_hash); - } - } -#endif - - /// Write crash to system.crash_log table if available. - if (collectCrashLog) - collectCrashLog(sig, thread_num, query_id, stack_trace); - - Context::getGlobalContextInstance()->handleCrash(); - - /// Send crash report to developers (if configured) - if (sig != SanitizerTrap) - { - if (auto * sentry = SentryWriter::getInstance()) - sentry->onSignal(sig, error_message, stack_trace.getFramePointers(), stack_trace.getOffset(), stack_trace.getSize()); - - /// Advice the user to send it manually. - if (std::string_view(VERSION_OFFICIAL).contains("official build")) - { - const auto & date_lut = DateLUT::instance(); - - /// Approximate support period, upper bound. - if (time(nullptr) - date_lut.makeDate(2000 + VERSION_MAJOR, VERSION_MINOR, 1) < (365 + 30) * 86400) - { - LOG_FATAL(log, "Report this error to https://github.com/ClickHouse/ClickHouse/issues"); - } - else - { - LOG_FATAL(log, "ClickHouse version {} is old and should be upgraded to the latest version.", VERSION_STRING); - } - } - else - { - LOG_FATAL(log, "This ClickHouse version is not official and should be upgraded to the official build."); - } - } - - /// List changed settings. - if (!query_id.empty()) - { - ContextPtr query_context = thread_ptr->getQueryContext(); - if (query_context) - { - String changed_settings = query_context->getSettingsRef().toString(); - - if (changed_settings.empty()) - LOG_FATAL(log, "No settings were changed"); - else - LOG_FATAL(log, "Changed settings: {}", changed_settings); - } - } - - /// When everything is done, we will try to send these error messages to the client. - if (thread_ptr) - thread_ptr->onFatalError(); - - fatal_error_printed.test_and_set(); - } - catch (...) - { - /// onFault is called from the std::thread, and it should catch all exceptions; otherwise, you can get unrelated fatal errors. - PreformattedMessage message = getCurrentExceptionMessageAndPattern(true); - LOG_FATAL(getLogger(__PRETTY_FUNCTION__), message); - } -}; - - -#if defined(SANITIZER) - -template -struct ValueHolder -{ - ValueHolder(T value_) : value(value_) - {} - - T value; -}; - -extern "C" void __sanitizer_set_death_callback(void (*)()); - -/// Sanitizers may not expect some function calls from death callback. -/// Let's try to disable instrumentation to avoid possible issues. -/// However, this callback may call other functions that are still instrumented. -/// We can try [[clang::always_inline]] attribute for statements in future (available in clang-15) -/// See https://github.com/google/sanitizers/issues/1543 and https://github.com/google/sanitizers/issues/1549. -static DISABLE_SANITIZER_INSTRUMENTATION void sanitizerDeathCallback() -{ - DENY_ALLOCATIONS_IN_SCOPE; - /// Also need to send data via pipe. Otherwise it may lead to deadlocks or failures in printing diagnostic info. - - char buf[signal_pipe_buf_size]; - WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf); - - const StackTrace stack_trace; - - writeBinary(SignalListener::SanitizerTrap, out); - writePODBinary(stack_trace, out); - /// We create a dummy struct with a constructor so DISABLE_SANITIZER_INSTRUMENTATION is not applied to it - /// otherwise, Memory sanitizer can't know that values initiialized inside this function are actually initialized - /// because instrumentations are disabled leading to false positives later on - ValueHolder thread_id{static_cast(getThreadId())}; - writeBinary(thread_id.value, out); - writePODBinary(current_thread, out); - - out.next(); - - /// The time that is usually enough for separate thread to print info into log. - sleepForSeconds(20); -} -#endif - - -/** To use with std::set_terminate. - * Collects slightly more info than __gnu_cxx::__verbose_terminate_handler, - * and send it to pipe. Other thread will read this info from pipe and asynchronously write it to log. - * Look at libstdc++-v3/libsupc++/vterminate.cc for example. - */ -[[noreturn]] static void terminate_handler() -{ - static thread_local bool terminating = false; - if (terminating) - abort(); - - terminating = true; - - std::string log_message; - - if (std::current_exception()) - log_message = "Terminate called for uncaught exception:\n" + getCurrentExceptionMessage(true); - else - log_message = "Terminate called without an active exception"; - - /// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe - /// And the buffer should not be too small because our exception messages can be large. - static constexpr size_t buf_size = PIPE_BUF; - - if (log_message.size() > buf_size - 16) - log_message.resize(buf_size - 16); - - char buf[buf_size]; - WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], buf_size, buf); - - writeBinary(static_cast(SignalListener::StdTerminate), out); - writeBinary(static_cast(getThreadId()), out); - writeBinary(log_message, out); - out.next(); - - abort(); -} - - static std::string createDirectory(const std::string & file) { fs::path path = fs::path(file).parent_path(); @@ -697,22 +148,7 @@ BaseDaemon::~BaseDaemon() { writeSignalIDtoSignalPipe(SignalListener::StopThread); signal_listener_thread.join(); - /// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after. - for (int sig : handled_signals) - if (SIG_ERR == signal(sig, SIG_DFL)) - { - try - { - throw ErrnoException(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler"); - } - catch (ErrnoException &) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - signal_pipe.close(); - + HandledSignals::instance().reset(); SentryWriter::resetInstance(); } @@ -751,6 +187,8 @@ void BaseDaemon::closeFDs() #else fs::path proc_path{"/proc/self/fd"}; #endif + + const auto & signal_pipe = HandledSignals::instance().signal_pipe; if (fs::is_directory(proc_path)) /// Hooray, proc exists { /// in /proc/self/fd directory filenames are numeric file descriptors. @@ -971,56 +409,6 @@ void BaseDaemon::initialize(Application & self) } -static void addSignalHandler(const std::vector & signals, signal_function handler, std::vector * out_handled_signals) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO; - -#if defined(OS_DARWIN) - sigemptyset(&sa.sa_mask); - for (auto signal : signals) - sigaddset(&sa.sa_mask, signal); -#else - if (sigemptyset(&sa.sa_mask)) - throw Poco::Exception("Cannot set signal handler."); - - for (auto signal : signals) - if (sigaddset(&sa.sa_mask, signal)) - throw Poco::Exception("Cannot set signal handler."); -#endif - - for (auto signal : signals) - if (sigaction(signal, &sa, nullptr)) - throw Poco::Exception("Cannot set signal handler."); - - if (out_handled_signals) - std::copy(signals.begin(), signals.end(), std::back_inserter(*out_handled_signals)); -} - - -static void blockSignals(const std::vector & signals) -{ - sigset_t sig_set; - -#if defined(OS_DARWIN) - sigemptyset(&sig_set); - for (auto signal : signals) - sigaddset(&sig_set, signal); -#else - if (sigemptyset(&sig_set)) - throw Poco::Exception("Cannot block signal."); - - for (auto signal : signals) - if (sigaddset(&sig_set, signal)) - throw Poco::Exception("Cannot block signal."); -#endif - - if (pthread_sigmask(SIG_BLOCK, &sig_set, nullptr)) - throw Poco::Exception("Cannot block signal."); -} - extern const char * GIT_HASH; void BaseDaemon::initializeTerminationAndSignalProcessing() @@ -1044,29 +432,21 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() }; } } - std::set_terminate(terminate_handler); /// We want to avoid SIGPIPE when working with sockets and pipes, and just handle return value/errno instead. blockSignals({SIGPIPE}); /// Setup signal handlers. - /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime. - addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals); - addSignalHandler({SIGHUP}, closeLogsSignalHandler, &handled_signals); - addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals); - -#if defined(SANITIZER) - __sanitizer_set_death_callback(sanitizerDeathCallback); -#endif + HandledSignals::instance().setupTerminateHandler(); + HandledSignals::instance().setupCommonDeadlySignalHandlers(); + HandledSignals::instance().setupCommonTerminateRequestSignalHandlers(); + HandledSignals::instance().addSignalHandler({SIGHUP}, closeLogsSignalHandler, true); /// Set up Poco ErrorHandler for Poco Threads. static KillingErrorHandler killing_error_handler; Poco::ErrorHandler::set(&killing_error_handler); - signal_pipe.setNonBlockingWrite(); - signal_pipe.tryIncreaseSize(1 << 20); - - signal_listener = std::make_unique(*this); + signal_listener = std::make_unique(this, getLogger("BaseDaemon")); signal_listener_thread.start(*signal_listener); #if defined(__ELF__) && !defined(OS_FREEBSD) @@ -1272,7 +652,7 @@ void BaseDaemon::setupWatchdog() /// Forward signals to the child process. if (forward_signals) { - addSignalHandler( + HandledSignals::instance().addSignalHandler( {SIGHUP, SIGINT, SIGQUIT, SIGTERM}, [](int sig, siginfo_t *, void *) { @@ -1288,7 +668,7 @@ void BaseDaemon::setupWatchdog() (void)res; } }, - nullptr); + false); } else { @@ -1305,6 +685,10 @@ void BaseDaemon::setupWatchdog() int status = 0; do { + // Close log files to prevent keeping descriptors of unlinked rotated files. + // On next log write files will be reopened. + closeLogs(logger()); + if (-1 != waitpid(pid, &status, WUNTRACED | WCONTINUED) || errno == ECHILD) { if (WIFSTOPPED(status)) diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index 3d34d404595..b15aa74fcf3 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -168,8 +168,6 @@ protected: String git_hash; String stored_binary_hash; - std::vector handled_signals; - bool should_setup_watchdog = false; char * argv0 = nullptr; }; diff --git a/src/DataTypes/DataTypeDate.cpp b/src/DataTypes/DataTypeDate.cpp index ee4b0065e59..0a7aa7deac6 100644 --- a/src/DataTypes/DataTypeDate.cpp +++ b/src/DataTypes/DataTypeDate.cpp @@ -17,7 +17,7 @@ SerializationPtr DataTypeDate::doGetDefaultSerialization() const void registerDataTypeDate(DataTypeFactory & factory) { - factory.registerSimpleDataType("Date", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); + factory.registerSimpleDataType("Date", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::Case::Insensitive); } } diff --git a/src/DataTypes/DataTypeDate32.cpp b/src/DataTypes/DataTypeDate32.cpp index 343e498d303..b2b8e7c0c1c 100644 --- a/src/DataTypes/DataTypeDate32.cpp +++ b/src/DataTypes/DataTypeDate32.cpp @@ -24,7 +24,7 @@ Field DataTypeDate32::getDefault() const void registerDataTypeDate32(DataTypeFactory & factory) { factory.registerSimpleDataType( - "Date32", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::CaseInsensitive); + "Date32", [] { return DataTypePtr(std::make_shared()); }, DataTypeFactory::Case::Insensitive); } } diff --git a/src/DataTypes/DataTypeDecimalBase.cpp b/src/DataTypes/DataTypeDecimalBase.cpp index 62218694924..2adfda5bb27 100644 --- a/src/DataTypes/DataTypeDecimalBase.cpp +++ b/src/DataTypes/DataTypeDecimalBase.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/DataTypes/DataTypeDomainBool.cpp b/src/DataTypes/DataTypeDomainBool.cpp index 3d19b6262d8..30dbba2d8c0 100644 --- a/src/DataTypes/DataTypeDomainBool.cpp +++ b/src/DataTypes/DataTypeDomainBool.cpp @@ -15,8 +15,8 @@ void registerDataTypeDomainBool(DataTypeFactory & factory) std::make_unique("Bool"), std::make_unique(type->getDefaultSerialization()))); }); - factory.registerAlias("bool", "Bool", DataTypeFactory::CaseInsensitive); - factory.registerAlias("boolean", "Bool", DataTypeFactory::CaseInsensitive); + factory.registerAlias("bool", "Bool", DataTypeFactory::Case::Insensitive); + factory.registerAlias("boolean", "Bool", DataTypeFactory::Case::Insensitive); } } diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index a1d5e4b39b7..08e0c0d2045 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -318,7 +318,7 @@ void registerDataTypeEnum(DataTypeFactory & factory) factory.registerDataType("Enum", create); /// MySQL - factory.registerAlias("ENUM", "Enum", DataTypeFactory::CaseInsensitive); + factory.registerAlias("ENUM", "Enum", DataTypeFactory::Case::Insensitive); } } diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 8c8f9999ada..af37cde2846 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -174,7 +175,7 @@ DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) cons } -void DataTypeFactory::registerDataType(const String & family_name, Value creator, CaseSensitiveness case_sensitiveness) +void DataTypeFactory::registerDataType(const String & family_name, Value creator, Case case_sensitiveness) { if (creator == nullptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "DataTypeFactory: the data type family {} has been provided a null constructor", family_name); @@ -188,12 +189,12 @@ void DataTypeFactory::registerDataType(const String & family_name, Value creator throw Exception(ErrorCodes::LOGICAL_ERROR, "DataTypeFactory: the data type family name '{}' is not unique", family_name); - if (case_sensitiveness == CaseInsensitive + if (case_sensitiveness == Case::Insensitive && !case_insensitive_data_types.emplace(family_name_lowercase, creator).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "DataTypeFactory: the case insensitive data type family name '{}' is not unique", family_name); } -void DataTypeFactory::registerSimpleDataType(const String & name, SimpleCreator creator, CaseSensitiveness case_sensitiveness) +void DataTypeFactory::registerSimpleDataType(const String & name, SimpleCreator creator, Case case_sensitiveness) { if (creator == nullptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "DataTypeFactory: the data type {} has been provided a null constructor", @@ -207,7 +208,7 @@ void DataTypeFactory::registerSimpleDataType(const String & name, SimpleCreator }, case_sensitiveness); } -void DataTypeFactory::registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness) +void DataTypeFactory::registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, Case case_sensitiveness) { registerDataType(family_name, [creator](const ASTPtr & ast) { @@ -218,7 +219,7 @@ void DataTypeFactory::registerDataTypeCustom(const String & family_name, Creator }, case_sensitiveness); } -void DataTypeFactory::registerSimpleDataTypeCustom(const String & name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness) +void DataTypeFactory::registerSimpleDataTypeCustom(const String & name, SimpleCreatorWithCustom creator, Case case_sensitiveness) { registerDataTypeCustom(name, [name, creator](const ASTPtr & ast) { diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 86e0203358d..edba9886d1c 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -41,16 +41,16 @@ public: DataTypePtr tryGet(const ASTPtr & ast) const; /// Register a type family by its name. - void registerDataType(const String & family_name, Value creator, CaseSensitiveness case_sensitiveness = CaseSensitive); + void registerDataType(const String & family_name, Value creator, Case case_sensitiveness = Case::Sensitive); /// Register a simple data type, that have no parameters. - void registerSimpleDataType(const String & name, SimpleCreator creator, CaseSensitiveness case_sensitiveness = CaseSensitive); + void registerSimpleDataType(const String & name, SimpleCreator creator, Case case_sensitiveness = Case::Sensitive); /// Register a customized type family - void registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive); + void registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, Case case_sensitiveness = Case::Sensitive); /// Register a simple customized data type - void registerSimpleDataTypeCustom(const String & name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive); + void registerSimpleDataTypeCustom(const String & name, SimpleCreatorWithCustom creator, Case case_sensitiveness = Case::Sensitive); private: template diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp index 85af59e852d..080ff8826a5 100644 --- a/src/DataTypes/DataTypeFixedString.cpp +++ b/src/DataTypes/DataTypeFixedString.cpp @@ -64,7 +64,7 @@ void registerDataTypeFixedString(DataTypeFactory & factory) factory.registerDataType("FixedString", create); /// Compatibility alias. - factory.registerAlias("BINARY", "FixedString", DataTypeFactory::CaseInsensitive); + factory.registerAlias("BINARY", "FixedString", DataTypeFactory::Case::Insensitive); } } diff --git a/src/DataTypes/DataTypeIPv4andIPv6.cpp b/src/DataTypes/DataTypeIPv4andIPv6.cpp index 4c0b45f472a..de11cc50107 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.cpp +++ b/src/DataTypes/DataTypeIPv4andIPv6.cpp @@ -9,9 +9,9 @@ namespace DB void registerDataTypeIPv4andIPv6(DataTypeFactory & factory) { factory.registerSimpleDataType("IPv4", [] { return DataTypePtr(std::make_shared()); }); - factory.registerAlias("INET4", "IPv4", DataTypeFactory::CaseInsensitive); + factory.registerAlias("INET4", "IPv4", DataTypeFactory::Case::Insensitive); factory.registerSimpleDataType("IPv6", [] { return DataTypePtr(std::make_shared()); }); - factory.registerAlias("INET6", "IPv6", DataTypeFactory::CaseInsensitive); + factory.registerAlias("INET6", "IPv6", DataTypeFactory::Case::Insensitive); } } diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 720436d0e0d..5636a46373f 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -76,7 +76,7 @@ void registerDataTypeObject(DataTypeFactory & factory) factory.registerDataType("Object", create); factory.registerSimpleDataType("JSON", [] { return std::make_shared("JSON", false); }, - DataTypeFactory::CaseInsensitive); + DataTypeFactory::Case::Insensitive); } } diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 95e49420009..ca65fb42cc8 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -62,38 +62,38 @@ void registerDataTypeString(DataTypeFactory & factory) /// These synonims are added for compatibility. - factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NCHAR", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NVARCHAR", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("VARCHAR2", "String", DataTypeFactory::CaseInsensitive); /// Oracle - factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("MEDIUMTEXT", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("LONGTEXT", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BLOB", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("CLOB", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("TINYBLOB", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("MEDIUMBLOB", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("LONGBLOB", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BYTEA", "String", DataTypeFactory::CaseInsensitive); /// PostgreSQL + factory.registerAlias("CHAR", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("NCHAR", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("CHARACTER", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("VARCHAR", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("NVARCHAR", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("VARCHAR2", "String", DataTypeFactory::Case::Insensitive); /// Oracle + factory.registerAlias("TEXT", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("TINYTEXT", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("MEDIUMTEXT", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("LONGTEXT", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("BLOB", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("CLOB", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("TINYBLOB", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("MEDIUMBLOB", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("LONGBLOB", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("BYTEA", "String", DataTypeFactory::Case::Insensitive); /// PostgreSQL - factory.registerAlias("CHARACTER LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("CHARACTER VARYING", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("CHAR LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("CHAR VARYING", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NATIONAL CHAR", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NATIONAL CHARACTER", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NATIONAL CHARACTER LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NATIONAL CHARACTER VARYING", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NATIONAL CHAR VARYING", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NCHAR VARYING", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NCHAR LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BINARY LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BINARY VARYING", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("VARBINARY", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("GEOMETRY", "String", DataTypeFactory::CaseInsensitive); //mysql + factory.registerAlias("CHARACTER LARGE OBJECT", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("CHARACTER VARYING", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("CHAR LARGE OBJECT", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("CHAR VARYING", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("NATIONAL CHAR", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("NATIONAL CHARACTER", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("NATIONAL CHARACTER LARGE OBJECT", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("NATIONAL CHARACTER VARYING", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("NATIONAL CHAR VARYING", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("NCHAR VARYING", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("NCHAR LARGE OBJECT", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("BINARY LARGE OBJECT", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("BINARY VARYING", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("VARBINARY", "String", DataTypeFactory::Case::Insensitive); + factory.registerAlias("GEOMETRY", "String", DataTypeFactory::Case::Insensitive); //mysql } } diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 77a7a3e7237..a427fd0717a 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -364,15 +364,15 @@ template class DataTypeDecimal; void registerDataTypeDecimal(DataTypeFactory & factory) { - factory.registerDataType("Decimal32", createExact, DataTypeFactory::CaseInsensitive); - factory.registerDataType("Decimal64", createExact, DataTypeFactory::CaseInsensitive); - factory.registerDataType("Decimal128", createExact, DataTypeFactory::CaseInsensitive); - factory.registerDataType("Decimal256", createExact, DataTypeFactory::CaseInsensitive); + factory.registerDataType("Decimal32", createExact, DataTypeFactory::Case::Insensitive); + factory.registerDataType("Decimal64", createExact, DataTypeFactory::Case::Insensitive); + factory.registerDataType("Decimal128", createExact, DataTypeFactory::Case::Insensitive); + factory.registerDataType("Decimal256", createExact, DataTypeFactory::Case::Insensitive); - factory.registerDataType("Decimal", create, DataTypeFactory::CaseInsensitive); - factory.registerAlias("DEC", "Decimal", DataTypeFactory::CaseInsensitive); - factory.registerAlias("NUMERIC", "Decimal", DataTypeFactory::CaseInsensitive); - factory.registerAlias("FIXED", "Decimal", DataTypeFactory::CaseInsensitive); + factory.registerDataType("Decimal", create, DataTypeFactory::Case::Insensitive); + factory.registerAlias("DEC", "Decimal", DataTypeFactory::Case::Insensitive); + factory.registerAlias("NUMERIC", "Decimal", DataTypeFactory::Case::Insensitive); + factory.registerAlias("FIXED", "Decimal", DataTypeFactory::Case::Insensitive); } } diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 99446d24eed..72020b0a5aa 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -65,41 +65,41 @@ void registerDataTypeNumbers(DataTypeFactory & factory) /// These synonyms are added for compatibility. - factory.registerAlias("TINYINT", "Int8", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INT1", "Int8", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BYTE", "Int8", DataTypeFactory::CaseInsensitive); - factory.registerAlias("TINYINT SIGNED", "Int8", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INT1 SIGNED", "Int8", DataTypeFactory::CaseInsensitive); - factory.registerAlias("SMALLINT", "Int16", DataTypeFactory::CaseInsensitive); - factory.registerAlias("SMALLINT SIGNED", "Int16", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INT", "Int32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INTEGER", "Int32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("MEDIUMINT", "Int32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("MEDIUMINT SIGNED", "Int32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INT SIGNED", "Int32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INTEGER SIGNED", "Int32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BIGINT", "Int64", DataTypeFactory::CaseInsensitive); - factory.registerAlias("SIGNED", "Int64", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BIGINT SIGNED", "Int64", DataTypeFactory::CaseInsensitive); - factory.registerAlias("TIME", "Int64", DataTypeFactory::CaseInsensitive); + factory.registerAlias("TINYINT", "Int8", DataTypeFactory::Case::Insensitive); + factory.registerAlias("INT1", "Int8", DataTypeFactory::Case::Insensitive); + factory.registerAlias("BYTE", "Int8", DataTypeFactory::Case::Insensitive); + factory.registerAlias("TINYINT SIGNED", "Int8", DataTypeFactory::Case::Insensitive); + factory.registerAlias("INT1 SIGNED", "Int8", DataTypeFactory::Case::Insensitive); + factory.registerAlias("SMALLINT", "Int16", DataTypeFactory::Case::Insensitive); + factory.registerAlias("SMALLINT SIGNED", "Int16", DataTypeFactory::Case::Insensitive); + factory.registerAlias("INT", "Int32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("INTEGER", "Int32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("MEDIUMINT", "Int32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("MEDIUMINT SIGNED", "Int32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("INT SIGNED", "Int32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("INTEGER SIGNED", "Int32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("BIGINT", "Int64", DataTypeFactory::Case::Insensitive); + factory.registerAlias("SIGNED", "Int64", DataTypeFactory::Case::Insensitive); + factory.registerAlias("BIGINT SIGNED", "Int64", DataTypeFactory::Case::Insensitive); + factory.registerAlias("TIME", "Int64", DataTypeFactory::Case::Insensitive); - factory.registerAlias("TINYINT UNSIGNED", "UInt8", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INT1 UNSIGNED", "UInt8", DataTypeFactory::CaseInsensitive); - factory.registerAlias("SMALLINT UNSIGNED", "UInt16", DataTypeFactory::CaseInsensitive); - factory.registerAlias("YEAR", "UInt16", DataTypeFactory::CaseInsensitive); - factory.registerAlias("MEDIUMINT UNSIGNED", "UInt32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INT UNSIGNED", "UInt32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INTEGER UNSIGNED", "UInt32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("UNSIGNED", "UInt64", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BIGINT UNSIGNED", "UInt64", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BIT", "UInt64", DataTypeFactory::CaseInsensitive); - factory.registerAlias("SET", "UInt64", DataTypeFactory::CaseInsensitive); + factory.registerAlias("TINYINT UNSIGNED", "UInt8", DataTypeFactory::Case::Insensitive); + factory.registerAlias("INT1 UNSIGNED", "UInt8", DataTypeFactory::Case::Insensitive); + factory.registerAlias("SMALLINT UNSIGNED", "UInt16", DataTypeFactory::Case::Insensitive); + factory.registerAlias("YEAR", "UInt16", DataTypeFactory::Case::Insensitive); + factory.registerAlias("MEDIUMINT UNSIGNED", "UInt32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("INT UNSIGNED", "UInt32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("INTEGER UNSIGNED", "UInt32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("UNSIGNED", "UInt64", DataTypeFactory::Case::Insensitive); + factory.registerAlias("BIGINT UNSIGNED", "UInt64", DataTypeFactory::Case::Insensitive); + factory.registerAlias("BIT", "UInt64", DataTypeFactory::Case::Insensitive); + factory.registerAlias("SET", "UInt64", DataTypeFactory::Case::Insensitive); - factory.registerAlias("FLOAT", "Float32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("REAL", "Float32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("SINGLE", "Float32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("DOUBLE", "Float64", DataTypeFactory::CaseInsensitive); - factory.registerAlias("DOUBLE PRECISION", "Float64", DataTypeFactory::CaseInsensitive); + factory.registerAlias("FLOAT", "Float32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("REAL", "Float32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("SINGLE", "Float32", DataTypeFactory::Case::Insensitive); + factory.registerAlias("DOUBLE", "Float64", DataTypeFactory::Case::Insensitive); + factory.registerAlias("DOUBLE PRECISION", "Float64", DataTypeFactory::Case::Insensitive); } /// Explicit template instantiations. diff --git a/src/DataTypes/EnumValues.h b/src/DataTypes/EnumValues.h index 889878bc60f..9762ff5870f 100644 --- a/src/DataTypes/EnumValues.h +++ b/src/DataTypes/EnumValues.h @@ -7,7 +7,7 @@ namespace DB { -namespace ErrorCodesEnumValues +namespace ErrorCodes { extern const int BAD_ARGUMENTS; } diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 8ceab17cba4..03b5d9584e0 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -11,6 +11,7 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; } struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState @@ -188,13 +189,6 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( assert_cast(*variant_element_state->variant->assumeMutable()).nestedRemoveNullable(); } - /// If nothing to deserialize, just insert defaults. - if (variant_limit == 0) - { - mutable_column->insertManyDefaults(num_new_discriminators); - return; - } - addVariantToPath(settings.path); nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache); removeVariantFromPath(settings.path); @@ -209,6 +203,17 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( return; } + /// If there was nothing to deserialize or nothing was actually deserialized when variant_limit > 0, just insert defaults. + /// The second case means that we don't have a stream for such sub-column. It may happen during ALTER MODIFY column with Variant extension. + if (variant_limit == 0 || variant_element_state->variant->empty()) + { + mutable_column->insertManyDefaults(num_new_discriminators); + return; + } + + if (variant_element_state->variant->size() < *variant_limit) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of deserialized variant column less than the limit: {} < {}", variant_element_state->variant->size(), *variant_limit); + size_t variant_offset = variant_element_state->variant->size() - *variant_limit; /// If we have only our discriminator in range, insert the whole range to result column. diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 802356cc108..84a52d4affb 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -108,11 +108,11 @@ static DataTypePtr create64(const ASTPtr & arguments) void registerDataTypeDateTime(DataTypeFactory & factory) { - factory.registerDataType("DateTime", create, DataTypeFactory::CaseInsensitive); - factory.registerDataType("DateTime32", create32, DataTypeFactory::CaseInsensitive); - factory.registerDataType("DateTime64", create64, DataTypeFactory::CaseInsensitive); + factory.registerDataType("DateTime", create, DataTypeFactory::Case::Insensitive); + factory.registerDataType("DateTime32", create32, DataTypeFactory::Case::Insensitive); + factory.registerDataType("DateTime64", create64, DataTypeFactory::Case::Insensitive); - factory.registerAlias("TIMESTAMP", "DateTime", DataTypeFactory::CaseInsensitive); + factory.registerAlias("TIMESTAMP", "DateTime", DataTypeFactory::Case::Insensitive); } } diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index c85e8f5688a..d149b49d465 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -79,13 +80,20 @@ namespace /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. void visitCreateQuery(const ASTCreateQuery & create) { - QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; - if (!to_table.table.empty()) + if (create.targets) { - /// TO target_table (for materialized views) - if (to_table.database.empty()) - to_table.database = current_database; - dependencies.emplace(to_table); + for (const auto & target : create.targets->targets) + { + const auto & table_id = target.table_id; + if (!table_id.table_name.empty()) + { + /// TO target_table (for materialized views) + QualifiedTableName target_name{table_id.database_name, table_id.table_name}; + if (target_name.database.empty()) + target_name.database = current_database; + dependencies.emplace(target_name); + } + } } QualifiedTableName as_table{create.as_database, create.as_table}; diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index 6cd414635a0..38e100e2470 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -86,12 +86,19 @@ namespace create.as_table = as_table_new.table; } - QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; - if (!to_table.table.empty() && !to_table.database.empty()) + if (create.targets) { - auto to_table_new = data.renaming_map.getNewTableName(to_table); - if (to_table_new != to_table) - create.to_table_id = StorageID{to_table_new.database, to_table_new.table}; + for (auto & target : create.targets->targets) + { + auto & table_id = target.table_id; + if (!table_id.database_name.empty() && !table_id.table_name.empty()) + { + QualifiedTableName target_name{table_id.database_name, table_id.table_name}; + auto new_target_name = data.renaming_map.getNewTableName(target_name); + if (new_target_name != target_name) + table_id = StorageID{new_target_name.database, new_target_name.table}; + } + } } } diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index f58b90f1134..d86e29ca915 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace fs = std::filesystem; @@ -38,8 +39,10 @@ namespace ErrorCodes class AtomicDatabaseTablesSnapshotIterator final : public DatabaseTablesSnapshotIterator { public: - explicit AtomicDatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && base) - : DatabaseTablesSnapshotIterator(std::move(base)) {} + explicit AtomicDatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && base) noexcept + : DatabaseTablesSnapshotIterator(std::move(base)) + { + } UUID uuid() const override { return table()->getStorageID().uuid; } }; @@ -110,12 +113,12 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & // it is important to call the destructors of not_in_use without // locked mutex to avoid potential deadlock. DetachedTables not_in_use; - StoragePtr table; + StoragePtr detached_table; { std::lock_guard lock(mutex); - table = DatabaseOrdinary::detachTableUnlocked(name); + detached_table = DatabaseOrdinary::detachTableUnlocked(name); table_name_to_path.erase(name); - detached_tables.emplace(table->getStorageID().uuid, table); + detached_tables.emplace(detached_table->getStorageID().uuid, detached_table); not_in_use = cleanupDetachedTables(); } @@ -125,7 +128,7 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & LOG_DEBUG(log, "Finished removing not used detached tables"); } - return table; + return detached_table; } void DatabaseAtomic::dropTable(ContextPtr local_context, const String & table_name, bool sync) diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index b59edd479ba..4a4ccfa2573 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -1,7 +1,8 @@ #pragma once -#include #include +#include +#include namespace DB diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index adb9a659fcd..942ffe6fd62 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -104,7 +105,7 @@ ASTPtr DatabaseDictionary::getCreateTableQueryImpl(const String & table_name, Co return {}; } - auto names_and_types = StorageDictionary::getNamesAndTypes(ExternalDictionariesLoader::getDictionaryStructure(*load_result.config)); + auto names_and_types = StorageDictionary::getNamesAndTypes(ExternalDictionariesLoader::getDictionaryStructure(*load_result.config), false); buffer << "CREATE TABLE " << backQuoteIfNeed(getDatabaseName()) << '.' << backQuoteIfNeed(table_name) << " ("; buffer << names_and_types.toNamesAndTypesDescription(); buffer << ") Engine = Dictionary(" << backQuoteIfNeed(table_name) << ")"; diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 3a4c1423f7c..05a5e057c55 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace fs = std::filesystem; diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index b27a816a60d..31701e665a1 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp index 060991d1290..eccaae5f22e 100644 --- a/src/Databases/DatabaseHDFS.cpp +++ b/src/Databases/DatabaseHDFS.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index da942cebf8f..ca30ee6db15 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -188,7 +188,14 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); - CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); + LOG_DEBUG(log, "Add info for detached table {} to snapshot.", backQuote(table_name)); + if (snapshot_detached_tables.contains(table_name)) + { + LOG_DEBUG(log, "Clean info about detached table {} from snapshot.", backQuote(table_name)); + snapshot_detached_tables.erase(table_name); + } + + CurrentMetrics::add(CurrentMetrics::AttachedTable); } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) @@ -204,8 +211,17 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta if (it->second.expiration_iterator != cache_expiration_queue.end()) cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); + LOG_DEBUG(log, "Add info for detached table {} to snapshot.", backQuote(table_name)); + snapshot_detached_tables.emplace( + table_name, + SnapshotDetachedTable{ + .database = res->getStorageID().database_name, + .table = res->getStorageID().table_name, + .uuid = res->getStorageID().uuid, + .metadata_path = getObjectMetadataPath(table_name), + .is_permanently = false}); - CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::sub(CurrentMetrics::AttachedTable); } return res; } diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index b8154372116..42730cfd53d 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -23,11 +23,13 @@ #include #include #include +#include #include #include #include #include #include +#include namespace fs = std::filesystem; @@ -307,6 +309,16 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri try { FS::createFile(detached_permanently_flag); + + std::lock_guard lock(mutex); + if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + } + else + { + it->second.is_permanently = true; + } } catch (Exception & e) { diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 7d4bb07e8ef..3ab5d3fa697 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include @@ -187,7 +189,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables size_t prev_tables_count = metadata.parsed_tables.size(); size_t prev_total_dictionaries = metadata.total_dictionaries; - auto process_metadata = [&metadata, is_startup, this](const String & file_name) + auto process_metadata = [&metadata, is_startup, local_context, this](const String & file_name) { fs::path path(getMetadataPath()); fs::path file_path(file_name); @@ -195,7 +197,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables try { - auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); + auto ast = parseQueryFromMetadata(log, local_context, full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { FunctionNameNormalizer::visit(ast.get()); @@ -224,8 +226,23 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables if (fs::exists(full_path.string() + detached_suffix)) { const std::string table_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4)); - permanently_detached_tables.push_back(table_name); LOG_DEBUG(log, "Skipping permanently detached table {}.", backQuote(table_name)); + + std::lock_guard lock(mutex); + permanently_detached_tables.push_back(table_name); + + const auto detached_table_name = create_query->getTable(); + + snapshot_detached_tables.emplace( + detached_table_name, + SnapshotDetachedTable{ + .database = create_query->getDatabase(), + .table = detached_table_name, + .uuid = create_query->uuid, + .metadata_path = getObjectMetadataPath(detached_table_name), + .is_permanently = true}); + + LOG_TRACE(log, "Add permanently detached table {} to system.detached_tables", detached_table_name); return; } @@ -487,6 +504,12 @@ DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_c return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name, skip_not_loaded); } +DatabaseDetachedTablesSnapshotIteratorPtr DatabaseOrdinary::getDetachedTablesIterator( + ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const +{ + return DatabaseWithOwnTablesBase::getDetachedTablesIterator(local_context, filter_by_table_name, skip_not_loaded); +} + Strings DatabaseOrdinary::getAllTableNames(ContextPtr) const { std::set unique_names; diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index ef00ac8fdfa..c2c5775e5ab 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -57,6 +57,9 @@ public: LoadTaskPtr startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode) override; DatabaseTablesIteratorPtr getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + DatabaseDetachedTablesSnapshotIteratorPtr getDetachedTablesIterator( + ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + Strings getAllTableNames(ContextPtr context) const override; void alterTable( @@ -64,7 +67,11 @@ public: const StorageID & table_id, const StorageInMemoryMetadata & metadata) override; - Strings getNamesOfPermanentlyDetachedTables() const override { return permanently_detached_tables; } + Strings getNamesOfPermanentlyDetachedTables() const override + { + std::lock_guard lock(mutex); + return permanently_detached_tables; + } protected: virtual void commitAlterTable( @@ -74,7 +81,7 @@ protected: const String & statement, ContextPtr query_context); - Strings permanently_detached_tables; + Strings permanently_detached_tables TSA_GUARDED_BY(mutex); std::unordered_map load_table TSA_GUARDED_BY(mutex); std::unordered_map startup_table TSA_GUARDED_BY(mutex); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 4ca9afc49eb..4c079ae5300 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include #include @@ -727,81 +729,14 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ if (auto * create = query->as()) { - bool replicated_table = create->storage && create->storage->engine && - (startsWith(create->storage->engine->name, "Replicated") || startsWith(create->storage->engine->name, "Shared")); - if (!replicated_table || !create->storage->engine->arguments) - return; + if (create->storage) + checkTableEngine(*create, *create->storage, query_context); - ASTs & args_ref = create->storage->engine->arguments->children; - ASTs args = args_ref; - if (args.size() < 2) - return; - - /// It can be a constant expression. Try to evaluate it, ignore exception if we cannot. - bool has_expression_argument = args_ref[0]->as() || args_ref[1]->as(); - if (has_expression_argument) + if (create->targets) { - try - { - args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context); - args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context); - } - catch (...) // NOLINT(bugprone-empty-catch) - { - } + for (const auto & inner_table_engine : create->targets->getInnerEngines()) + checkTableEngine(*create, *inner_table_engine, query_context); } - - ASTLiteral * arg1 = args[0]->as(); - ASTLiteral * arg2 = args[1]->as(); - if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) - return; - - String maybe_path = arg1->value.get(); - String maybe_replica = arg2->value.get(); - - /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. - /// Let's ensure that some macros are used. - /// NOTE: we cannot check here that substituted values will be actually different on shards and replicas. - - Macros::MacroExpansionInfo info; - info.table_id = {getDatabaseName(), create->getTable(), create->uuid}; - info.shard = getShardName(); - info.replica = getReplicaName(); - query_context->getMacros()->expand(maybe_path, info); - bool maybe_shard_macros = info.expanded_other; - info.expanded_other = false; - query_context->getMacros()->expand(maybe_replica, info); - bool maybe_replica_macros = info.expanded_other; - bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); - - if (!enable_functional_tests_helper) - { - if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) - LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); - else - throw Exception(ErrorCodes::INCORRECT_QUERY, - "It's not allowed to specify explicit zookeeper_path and replica_name " - "for ReplicatedMergeTree arguments in Replicated database. If you really want to " - "specify them explicitly, enable setting " - "database_replicated_allow_replicated_engine_arguments."); - } - - if (maybe_shard_macros && maybe_replica_macros) - return; - - if (enable_functional_tests_helper && !has_expression_argument) - { - if (maybe_path.empty() || maybe_path.back() != '/') - maybe_path += '/'; - args_ref[0]->as()->value = maybe_path + "auto_{shard}"; - args_ref[1]->as()->value = maybe_replica + "auto_{replica}"; - return; - } - - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. " - "If you really want to specify it explicitly, then you should use some macros " - "to distinguish different shards and replicas"); } } @@ -825,6 +760,85 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ } } +void DatabaseReplicated::checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const +{ + bool replicated_table = storage.engine && + (startsWith(storage.engine->name, "Replicated") || startsWith(storage.engine->name, "Shared")); + if (!replicated_table || !storage.engine->arguments) + return; + + ASTs & args_ref = storage.engine->arguments->children; + ASTs args = args_ref; + if (args.size() < 2) + return; + + /// It can be a constant expression. Try to evaluate it, ignore exception if we cannot. + bool has_expression_argument = args_ref[0]->as() || args_ref[1]->as(); + if (has_expression_argument) + { + try + { + args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context); + args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context); + } + catch (...) // NOLINT(bugprone-empty-catch) + { + } + } + + ASTLiteral * arg1 = args[0]->as(); + ASTLiteral * arg2 = args[1]->as(); + if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) + return; + + String maybe_path = arg1->value.get(); + String maybe_replica = arg2->value.get(); + + /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. + /// Let's ensure that some macros are used. + /// NOTE: we cannot check here that substituted values will be actually different on shards and replicas. + + Macros::MacroExpansionInfo info; + info.table_id = {getDatabaseName(), query.getTable(), query.uuid}; + info.shard = getShardName(); + info.replica = getReplicaName(); + query_context->getMacros()->expand(maybe_path, info); + bool maybe_shard_macros = info.expanded_other; + info.expanded_other = false; + query_context->getMacros()->expand(maybe_replica, info); + bool maybe_replica_macros = info.expanded_other; + bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); + + if (!enable_functional_tests_helper) + { + if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) + LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); + else + throw Exception(ErrorCodes::INCORRECT_QUERY, + "It's not allowed to specify explicit zookeeper_path and replica_name " + "for ReplicatedMergeTree arguments in Replicated database. If you really want to " + "specify them explicitly, enable setting " + "database_replicated_allow_replicated_engine_arguments."); + } + + if (maybe_shard_macros && maybe_replica_macros) + return; + + if (enable_functional_tests_helper && !has_expression_argument) + { + if (maybe_path.empty() || maybe_path.back() != '/') + maybe_path += '/'; + args_ref[0]->as()->value = maybe_path + "auto_{shard}"; + args_ref[1]->as()->value = maybe_replica + "auto_{replica}"; + return; + } + + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. " + "If you really want to specify it explicitly, then you should use some macros " + "to distinguish different shards and replicas"); +} + BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags) { waitDatabaseStarted(); @@ -1310,11 +1324,9 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node if (create.uuid == UUIDHelpers::Nil || create.getTable() != TABLE_WITH_UUID_NAME_PLACEHOLDER || create.database) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected query from {}: {}", node_name, query); - bool is_materialized_view_with_inner_table = create.is_materialized_view && create.to_table_id.empty(); - create.setDatabase(getDatabaseName()); create.setTable(unescapeForFileName(node_name)); - create.attach = is_materialized_view_with_inner_table; + create.attach = create.is_materialized_view_with_inner_table(); return ast; } diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index eab5b2ff931..8c3fa7c87f6 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -107,6 +107,7 @@ private: void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config); void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const; + void checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const; void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 & max_log_ptr); diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 31d6f7876a8..1ef88dc03bc 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace fs = std::filesystem; diff --git a/src/Databases/DatabaseS3.cpp b/src/Databases/DatabaseS3.cpp index 1589cc1c75d..2b2d978a846 100644 --- a/src/Databases/DatabaseS3.cpp +++ b/src/Databases/DatabaseS3.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 6426123bb4f..6ccaf811764 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -2,12 +2,9 @@ #include #include -#include -#include -#include -#include #include #include +#include #include #include #include @@ -16,6 +13,10 @@ #include #include #include +#include +#include +#include +#include namespace DB @@ -237,6 +238,24 @@ DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(ContextPt return std::make_unique(std::move(filtered_tables), database_name); } +DatabaseDetachedTablesSnapshotIteratorPtr DatabaseWithOwnTablesBase::getDetachedTablesIterator( + ContextPtr, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const +{ + std::lock_guard lock(mutex); + if (!filter_by_table_name) + return std::make_unique(snapshot_detached_tables); + + SnapshotDetachedTables filtered_detached_tables; + for (const auto & [detached_table_name, snapshot] : snapshot_detached_tables) + if (filter_by_table_name(detached_table_name)) + { + filtered_detached_tables.emplace(detached_table_name, snapshot); + } + + + return std::make_unique(std::move(filtered_detached_tables)); +} + bool DatabaseWithOwnTablesBase::empty() const { std::lock_guard lock(mutex); @@ -251,27 +270,36 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(ContextPtr /* context_ */, con StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_name) { - StoragePtr res; - auto it = tables.find(table_name); if (it == tables.end()) throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuote(database_name), backQuote(table_name)); - res = it->second; + + auto table_storage = it->second; + + snapshot_detached_tables.emplace( + table_name, + SnapshotDetachedTable{ + .database = it->second->getStorageID().getDatabaseName(), + .table = table_name, + .uuid = it->second->getStorageID().uuid, + .metadata_path = getObjectMetadataPath(table_name), + .is_permanently = false}); + tables.erase(it); - res->is_detached = true; + table_storage->is_detached = true; - if (res->isSystemStorage() == false) - CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); + if (!table_storage->isSystemStorage() && database_name != DatabaseCatalog::SYSTEM_DATABASE) + CurrentMetrics::sub(getAttachedCounterForStorage(table_storage)); - auto table_id = res->getStorageID(); + auto table_id = table_storage->getStorageID(); if (table_id.hasUUID()) { assert(database_name == DatabaseCatalog::TEMPORARY_DATABASE || getUUID() != UUIDHelpers::Nil); DatabaseCatalog::instance().removeUUIDMapping(table_id.uuid); } - return res; + return table_storage; } void DatabaseWithOwnTablesBase::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &) @@ -300,12 +328,14 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {} already exists.", table_id.getFullTableName()); } + snapshot_detached_tables.erase(table_name); + /// It is important to reset is_detached here since in case of RENAME in /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - if (table->isSystemStorage() == false && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) - CurrentMetrics::add(getAttachedCounterForStorage(table), 1); + if (!table->isSystemStorage() && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) + CurrentMetrics::add(getAttachedCounterForStorage(table)); } void DatabaseWithOwnTablesBase::shutdown() @@ -337,6 +367,7 @@ void DatabaseWithOwnTablesBase::shutdown() std::lock_guard lock(mutex); tables.clear(); + snapshot_detached_tables.clear(); } DatabaseWithOwnTablesBase::~DatabaseWithOwnTablesBase() diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h index 2eecf8a564f..1ca49e90c23 100644 --- a/src/Databases/DatabasesCommon.h +++ b/src/Databases/DatabasesCommon.h @@ -37,6 +37,9 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + DatabaseDetachedTablesSnapshotIteratorPtr + getDetachedTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; @@ -46,12 +49,13 @@ public: protected: Tables tables TSA_GUARDED_BY(mutex); + SnapshotDetachedTables snapshot_detached_tables TSA_GUARDED_BY(mutex); LoggerPtr log; DatabaseWithOwnTablesBase(const String & name_, const String & logger, ContextPtr context); void attachTableUnlocked(const String & table_name, const StoragePtr & table) TSA_REQUIRES(mutex); - StoragePtr detachTableUnlocked(const String & table_name) TSA_REQUIRES(mutex); + StoragePtr detachTableUnlocked(const String & table_name) TSA_REQUIRES(mutex); StoragePtr getTableUnlocked(const String & table_name) const TSA_REQUIRES(mutex); StoragePtr tryGetTableNoWait(const String & table_name) const; }; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index b00f2fe4baf..f94326d220e 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -5,20 +5,21 @@ #include #include #include +#include #include #include -#include #include +#include #include #include -#include #include #include +#include #include #include +#include #include -#include namespace DB @@ -110,6 +111,57 @@ public: using DatabaseTablesIteratorPtr = std::unique_ptr; +struct SnapshotDetachedTable final +{ + String database; + String table; + UUID uuid = UUIDHelpers::Nil; + String metadata_path; + bool is_permanently{}; +}; + +class DatabaseDetachedTablesSnapshotIterator +{ +private: + SnapshotDetachedTables snapshot; + SnapshotDetachedTables::iterator it; + +protected: + DatabaseDetachedTablesSnapshotIterator(DatabaseDetachedTablesSnapshotIterator && other) noexcept + { + size_t idx = std::distance(other.snapshot.begin(), other.it); + std::swap(snapshot, other.snapshot); + other.it = other.snapshot.end(); + it = snapshot.begin(); + std::advance(it, idx); + } + +public: + explicit DatabaseDetachedTablesSnapshotIterator(const SnapshotDetachedTables & tables_) : snapshot(tables_), it(snapshot.begin()) + { + } + + explicit DatabaseDetachedTablesSnapshotIterator(SnapshotDetachedTables && tables_) : snapshot(std::move(tables_)), it(snapshot.begin()) + { + } + + void next() { ++it; } + + bool isValid() const { return it != snapshot.end(); } + + String database() const { return it->second.database; } + + String table() const { return it->second.table; } + + UUID uuid() const { return it->second.uuid; } + + String metadataPath() const { return it->second.metadata_path; } + + bool isPermanently() const { return it->second.is_permanently; } +}; + +using DatabaseDetachedTablesSnapshotIteratorPtr = std::unique_ptr; + /** Database engine. * It is responsible for: @@ -232,6 +284,12 @@ public: /// Wait for all tables to be loaded and started up. If `skip_not_loaded` is true, then not yet loaded or not yet started up (at the moment of iterator creation) tables are excluded. virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}, bool skip_not_loaded = false) const = 0; /// NOLINT + virtual DatabaseDetachedTablesSnapshotIteratorPtr getDetachedTablesIterator( + ContextPtr /*context*/, const FilterByNameFunction & /*filter_by_table_name = {}*/, bool /*skip_not_loaded = false*/) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get detached tables for Database{}", getEngineName()); + } + /// Returns list of table names. virtual Strings getAllTableNames(ContextPtr context) const { diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index bb24373a7e1..7aa29018f4d 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -30,6 +30,7 @@ # include # include # include +# include # include # include # include diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp index e78f4aa2234..0828438cad2 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.cpp +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -2,6 +2,7 @@ #if USE_MYSQL #include +#include #include #include #include diff --git a/src/Databases/MySQL/FetchTablesColumnsList.h b/src/Databases/MySQL/FetchTablesColumnsList.h index 736a0ffd607..ec12cdc73b0 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.h +++ b/src/Databases/MySQL/FetchTablesColumnsList.h @@ -12,11 +12,12 @@ #include #include -#include namespace DB { +struct Settings; + std::map fetchTablesColumnsList( mysqlxx::PoolWithFailover & pool, const String & database_name, diff --git a/src/Databases/MySQL/MaterializeMetadata.h b/src/Databases/MySQL/MaterializeMetadata.h index e78b7132c8d..36d3fb569d8 100644 --- a/src/Databases/MySQL/MaterializeMetadata.h +++ b/src/Databases/MySQL/MaterializeMetadata.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index e38d09e99b2..6b0548b85c7 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index b22356bebea..a846e23cd4f 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include namespace fs = std::filesystem; diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index e758ea35de5..132a978140c 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -3,6 +3,7 @@ #if USE_SQLITE #include +#include #include #include #include diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index c2f2f4a8532..bd9ec57af1f 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -58,15 +58,6 @@ std::optional tryGetAttributeUnderlyingType(TypeIndex i } - -DictionarySpecialAttribute::DictionarySpecialAttribute(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) - : name{config.getString(config_prefix + ".name", "")}, expression{config.getString(config_prefix + ".expression", "")} -{ - if (name.empty() && !expression.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Element {}.name is empty", config_prefix); -} - - DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) { std::string structure_prefix = config_prefix + ".structure"; @@ -79,7 +70,8 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration if (has_id) { - id.emplace(config, structure_prefix + ".id"); + static constexpr auto id_default_type = "UInt64"; + id.emplace(makeDictionaryTypedSpecialAttribute(config, structure_prefix + ".id", id_default_type)); } else if (has_key) { diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index 55060b1592f..0d44b696d74 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -89,14 +89,6 @@ constexpr void callOnDictionaryAttributeType(AttributeUnderlyingType type, F && }); } -struct DictionarySpecialAttribute final -{ - const std::string name; - const std::string expression; - - DictionarySpecialAttribute(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); -}; - struct DictionaryTypedSpecialAttribute final { const std::string name; @@ -108,7 +100,7 @@ struct DictionaryTypedSpecialAttribute final /// Name of identifier plus list of attributes struct DictionaryStructure final { - std::optional id; + std::optional id; std::optional> key; std::vector attributes; std::unordered_map attribute_name_to_index; diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 2c0f7653aff..f3b32402c20 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -1,6 +1,7 @@ #include "DirectDictionary.h" #include +#include #include #include diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index d8111afdc19..217a67453e4 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -8,6 +8,8 @@ #include #include +#include + #include #include #include diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index dae8ec06d30..663c63dd6c6 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -1,12 +1,13 @@ #include "HTTPDictionarySource.h" +#include #include #include #include #include #include #include -#include #include +#include #include #include #include diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 9b194acf87f..d7d50dfb0a6 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index 206516edab0..41fafcc162b 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -1,11 +1,11 @@ #include "IPAddressDictionary.h" -#include -#include + #include #include #include #include #include +#include #include #include #include @@ -23,6 +23,9 @@ #include #include +#include +#include + namespace DB { diff --git a/src/Dictionaries/PolygonDictionaryImplementations.cpp b/src/Dictionaries/PolygonDictionaryImplementations.cpp index e21fadb0e7e..84abeed2731 100644 --- a/src/Dictionaries/PolygonDictionaryImplementations.cpp +++ b/src/Dictionaries/PolygonDictionaryImplementations.cpp @@ -8,6 +8,7 @@ #include #include +#include #include diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index b35e14577a8..f62a9a009d8 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -2,6 +2,7 @@ #include #include +#include #include "DictionarySourceFactory.h" #include "registerDictionaries.h" diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 830f9d09364..2522de60183 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -1002,9 +1003,9 @@ void registerDictionaryRegExpTree(DictionaryFactory & factory) dict_struct, std::move(source_ptr), configuration, - context->getSettings().regexp_dict_allow_hyperscan, - context->getSettings().regexp_dict_flag_case_insensitive, - context->getSettings().regexp_dict_flag_dotall); + context->getSettingsRef().regexp_dict_allow_hyperscan, + context->getSettingsRef().regexp_dict_flag_case_insensitive, + context->getSettingsRef().regexp_dict_flag_dotall); }; factory.registerLayout("regexp_tree", create_layout, true); diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 1ebfc4a29b0..590d000f16e 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -10,11 +10,13 @@ #include #include #include +#include #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" #include "readInvalidateQuery.h" #include "registerDictionaries.h" #include +#include #include #include #include "config.h" @@ -239,7 +241,7 @@ void registerDictionarySourceXDBC(DictionarySourceFactory & factory) #if USE_ODBC BridgeHelperPtr bridge = std::make_shared>( global_context, - global_context->getSettings().http_receive_timeout, + global_context->getSettingsRef().http_receive_timeout, config.getString(config_prefix + ".odbc.connection_string"), config.getBool(config_prefix + ".settings.odbc_bridge_use_connection_pooling", global_context->getSettingsRef().odbc_bridge_use_connection_pooling)); diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 9ee2027afc7..4ec2e1f5260 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -382,6 +382,15 @@ void buildPrimaryKeyConfiguration( name_element->appendChild(name); buildAttributeExpressionIfNeeded(doc, id_element, dict_attr); + + if (!dict_attr->type) + return; + + AutoPtr type_element(doc->createElement("type")); + id_element->appendChild(type_element); + + AutoPtr type(doc->createTextNode(queryToString(dict_attr->type))); + type_element->appendChild(type); } else { diff --git a/src/Dictionaries/registerCacheDictionaries.cpp b/src/Dictionaries/registerCacheDictionaries.cpp index b79261955ff..f2d027575f5 100644 --- a/src/Dictionaries/registerCacheDictionaries.cpp +++ b/src/Dictionaries/registerCacheDictionaries.cpp @@ -2,6 +2,7 @@ #include "CacheDictionaryStorage.h" #include "SSDCacheDictionaryStorage.h" #include +#include #include #include diff --git a/src/Dictionaries/registerHashedDictionary.cpp b/src/Dictionaries/registerHashedDictionary.cpp index 5fc4f5d5cb6..068b71170cc 100644 --- a/src/Dictionaries/registerHashedDictionary.cpp +++ b/src/Dictionaries/registerHashedDictionary.cpp @@ -1,8 +1,8 @@ +#include #include #include #include #include - #include namespace DB diff --git a/src/Dictionaries/registerRangeHashedDictionary.cpp b/src/Dictionaries/registerRangeHashedDictionary.cpp index 8123b811198..bc142a6bddc 100644 --- a/src/Dictionaries/registerRangeHashedDictionary.cpp +++ b/src/Dictionaries/registerRangeHashedDictionary.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index e9c642666d3..198f6c0ea04 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -59,7 +59,7 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( std::optional read_until_position_, std::shared_ptr cache_log_) : ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0, file_size_) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD , log(getLogger(fmt::format("CachedOnDiskReadBufferFromFile({})", cache_key_))) #else , log(getLogger("CachedOnDiskReadBufferFromFile")) @@ -452,7 +452,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme { case ReadType::CACHED: { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD size_t file_size = getFileSizeFromReadBuffer(*read_buffer_for_file_segment); if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end) throw Exception( @@ -937,7 +937,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() if (!result) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (read_type == ReadType::CACHED) { size_t cache_file_size = getFileSizeFromReadBuffer(*implementation_buffer); diff --git a/src/Disks/IO/ReadBufferFromWebServer.cpp b/src/Disks/IO/ReadBufferFromWebServer.cpp index 7c5de8a13de..837452aa9f2 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.cpp +++ b/src/Disks/IO/ReadBufferFromWebServer.cpp @@ -1,9 +1,12 @@ #include "ReadBufferFromWebServer.h" -#include +#include +#include +#include #include #include -#include +#include + #include diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp index d9dfedadd48..0aa3b9c40b5 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h index 19ba48ea225..bb2f0270924 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -11,7 +11,6 @@ #include #include -#include #include #include #include @@ -19,6 +18,8 @@ namespace DB { +struct Settings; + namespace AzureBlobStorage { diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index d9535358daf..d42da6dc9c4 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index a6781e0ab35..a6672e14e10 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 62df98f51e6..de78ce4594d 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -1,14 +1,14 @@ #include -#include -#include #if USE_AWS_S3 +#include #include #include #include #include #include +#include #include #include #include @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index e837e056acc..7f7a3fe1a62 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 3d92023318e..1271cdfb7ad 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -175,7 +176,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.parquet.write_page_index = settings.output_format_parquet_write_page_index; format_settings.parquet.local_read_min_bytes_for_seek = settings.input_format_parquet_local_file_min_bytes_for_seek; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; - format_settings.pretty.color = settings.output_format_pretty_color; + format_settings.pretty.color = settings.output_format_pretty_color.valueOr(2); format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; format_settings.pretty.max_rows = settings.output_format_pretty_max_rows; format_settings.pretty.max_value_width = settings.output_format_pretty_max_value_width; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8b38ffba524..a736bb1a558 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -1,10 +1,9 @@ #pragma once -#include #include +#include #include #include -#include namespace DB { @@ -293,7 +292,8 @@ struct FormatSettings UInt64 max_value_width = 10000; UInt64 max_value_width_apply_for_single_value = false; bool highlight_digit_groups = true; - SettingFieldUInt64Auto color{"auto"}; + /// Set to 2 for auto + UInt64 color = 2; bool output_format_pretty_row_numbers = false; UInt64 output_format_pretty_single_large_number_tip_threshold = 1'000'000; diff --git a/src/Formats/NumpyDataTypes.h b/src/Formats/NumpyDataTypes.h index 062f743c0ea..6ccdf65f457 100644 --- a/src/Formats/NumpyDataTypes.h +++ b/src/Formats/NumpyDataTypes.h @@ -2,6 +2,7 @@ #include #include #include +#include namespace ErrorCodes { diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 735b536986d..617595c19a2 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index 49d6dd6fa52..0ba976669a3 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -150,9 +150,9 @@ using FunctionCRC64ECMA = FunctionCRC; REGISTER_FUNCTION(CRC) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 0f54ff52ba2..49f63073aaf 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -136,10 +137,10 @@ FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, st REGISTER_FUNCTION(CastOverloadResolvers) { - factory.registerFunction("_CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, true, {}); }, {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("_CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, true, {}); }, {}, FunctionFactory::Case::Insensitive); /// Note: "internal" (not affected by null preserving setting) versions of accurate cast functions are unneeded. - factory.registerFunction("CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, false, {}); }, {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, false, {}); }, {}, FunctionFactory::Case::Insensitive); factory.registerFunction("accurateCast", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::accurate, false, {}); }, {}); factory.registerFunction("accurateCastOrNull", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::accurateOrNull, false, {}); }, {}); } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 34c59ecab08..5f745f3ccad 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1954,7 +1954,10 @@ struct ToRelativeSubsecondNumImpl return t.value; if (scale > scale_multiplier) return t.value / (scale / scale_multiplier); - return t.value * (scale_multiplier / scale); + return static_cast(t.value) * static_cast((scale_multiplier / scale)); + /// Casting ^^: All integers are Int64, yet if t.value is big enough the multiplication can still + /// overflow which is UB. This place is too low-level and generic to check if t.value is sane. + /// Therefore just let it overflow safely and don't bother further. } static Int64 execute(UInt32 t, const DateLUTImpl &) { diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 5d19ba44d9b..ee4e28a7858 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -1703,7 +1703,7 @@ public: { if constexpr (is_division) { - if (context->getSettingsRef().decimal_check_overflow) + if (decimalCheckArithmeticOverflow(context)) { /// Check overflow by using operands scale (based on big decimal division implementation details): /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers diff --git a/src/Functions/FunctionChar.cpp b/src/Functions/FunctionChar.cpp index 055eb08f0c7..0ebe1442f08 100644 --- a/src/Functions/FunctionChar.cpp +++ b/src/Functions/FunctionChar.cpp @@ -116,7 +116,7 @@ private: REGISTER_FUNCTION(Char) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/FunctionCustomWeekToDateOrDate32.h b/src/Functions/FunctionCustomWeekToDateOrDate32.h index c904bfaec30..6ae8c847b65 100644 --- a/src/Functions/FunctionCustomWeekToDateOrDate32.h +++ b/src/Functions/FunctionCustomWeekToDateOrDate32.h @@ -1,4 +1,5 @@ #pragma once +#include #include namespace DB diff --git a/src/Functions/FunctionDateOrDateTimeToDateOrDate32.h b/src/Functions/FunctionDateOrDateTimeToDateOrDate32.h index 6eb3e534b62..28caef36879 100644 --- a/src/Functions/FunctionDateOrDateTimeToDateOrDate32.h +++ b/src/Functions/FunctionDateOrDateTimeToDateOrDate32.h @@ -1,4 +1,5 @@ #pragma once +#include #include namespace DB diff --git a/src/Functions/FunctionDateOrDateTimeToDateTimeOrDateTime64.h b/src/Functions/FunctionDateOrDateTimeToDateTimeOrDateTime64.h index 9f1066fd687..d5a1f28b7cd 100644 --- a/src/Functions/FunctionDateOrDateTimeToDateTimeOrDateTime64.h +++ b/src/Functions/FunctionDateOrDateTimeToDateTimeOrDateTime64.h @@ -1,4 +1,6 @@ #pragma once + +#include #include namespace DB diff --git a/src/Functions/FunctionFQDN.cpp b/src/Functions/FunctionFQDN.cpp index 108a96216fd..8948c948265 100644 --- a/src/Functions/FunctionFQDN.cpp +++ b/src/Functions/FunctionFQDN.cpp @@ -46,7 +46,7 @@ public: REGISTER_FUNCTION(FQDN) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); factory.registerAlias("fullHostName", "FQDN"); } diff --git a/src/Functions/FunctionFactory.cpp b/src/Functions/FunctionFactory.cpp index 004ef745a93..501cf6e725c 100644 --- a/src/Functions/FunctionFactory.cpp +++ b/src/Functions/FunctionFactory.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -30,7 +31,7 @@ void FunctionFactory::registerFunction( const std::string & name, FunctionCreator creator, FunctionDocumentation doc, - CaseSensitiveness case_sensitiveness) + Case case_sensitiveness) { if (!functions.emplace(name, FunctionFactoryData{creator, doc}).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "FunctionFactory: the function name '{}' is not unique", name); @@ -40,7 +41,7 @@ void FunctionFactory::registerFunction( throw Exception(ErrorCodes::LOGICAL_ERROR, "FunctionFactory: the function name '{}' is already registered as alias", name); - if (case_sensitiveness == CaseInsensitive) + if (case_sensitiveness == Case::Insensitive) { if (!case_insensitive_functions.emplace(function_name_lowercase, FunctionFactoryData{creator, doc}).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "FunctionFactory: the case insensitive function name '{}' is not unique", @@ -53,7 +54,7 @@ void FunctionFactory::registerFunction( const std::string & name, FunctionSimpleCreator creator, FunctionDocumentation doc, - CaseSensitiveness case_sensitiveness) + Case case_sensitiveness) { registerFunction(name, [my_creator = std::move(creator)](ContextPtr context) { diff --git a/src/Functions/FunctionFactory.h b/src/Functions/FunctionFactory.h index bb43d4719b8..d05e84439be 100644 --- a/src/Functions/FunctionFactory.h +++ b/src/Functions/FunctionFactory.h @@ -30,7 +30,7 @@ public: static FunctionFactory & instance(); template - void registerFunction(FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive) + void registerFunction(FunctionDocumentation doc = {}, Case case_sensitiveness = Case::Sensitive) { registerFunction(Function::name, std::move(doc), case_sensitiveness); } @@ -56,13 +56,13 @@ public: const std::string & name, FunctionCreator creator, FunctionDocumentation doc = {}, - CaseSensitiveness case_sensitiveness = CaseSensitive); + Case case_sensitiveness = Case::Sensitive); void registerFunction( const std::string & name, FunctionSimpleCreator creator, FunctionDocumentation doc = {}, - CaseSensitiveness case_sensitiveness = CaseSensitive); + Case case_sensitiveness = Case::Sensitive); FunctionDocumentation getDocumentation(const std::string & name) const; @@ -79,7 +79,7 @@ private: String getFactoryName() const override { return "FunctionFactory"; } template - void registerFunction(const std::string & name, FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive) + void registerFunction(const std::string & name, FunctionDocumentation doc = {}, Case case_sensitiveness = Case::Sensitive) { registerFunction(name, &Function::create, std::move(doc), case_sensitiveness); } diff --git a/src/Functions/FunctionGenerateRandomStructure.cpp b/src/Functions/FunctionGenerateRandomStructure.cpp index 6dc68134502..2bead8737fd 100644 --- a/src/Functions/FunctionGenerateRandomStructure.cpp +++ b/src/Functions/FunctionGenerateRandomStructure.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -354,6 +355,11 @@ namespace } } + FunctionPtr FunctionGenerateRandomStructure::create(DB::ContextPtr context) + { + return std::make_shared(context->getSettingsRef().allow_suspicious_low_cardinality_types.value); + } + DataTypePtr FunctionGenerateRandomStructure::getReturnTypeImpl(const DataTypes & arguments) const { if (arguments.size() > 2) @@ -439,8 +445,7 @@ The function returns a value of type String. {"with specified seed", "SELECT generateRandomStructure(1, 42)", "c1 UInt128"}, }, .categories{"Random"} - }, - FunctionFactory::CaseSensitive); + }); } } diff --git a/src/Functions/FunctionGenerateRandomStructure.h b/src/Functions/FunctionGenerateRandomStructure.h index 894096a6e07..5901565696f 100644 --- a/src/Functions/FunctionGenerateRandomStructure.h +++ b/src/Functions/FunctionGenerateRandomStructure.h @@ -17,10 +17,7 @@ public: { } - static FunctionPtr create(ContextPtr context) - { - return std::make_shared(context->getSettingsRef().allow_suspicious_low_cardinality_types.value); - } + static FunctionPtr create(ContextPtr context); String getName() const override { return name; } diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index 0a2859fe864..a67f3a977db 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index 0ca47126198..f1435ca5651 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/FunctionUnixTimestamp64.cpp b/src/Functions/FunctionUnixTimestamp64.cpp new file mode 100644 index 00000000000..a22c035a0ba --- /dev/null +++ b/src/Functions/FunctionUnixTimestamp64.cpp @@ -0,0 +1,13 @@ +#include +#include + +namespace DB +{ + +FunctionFromUnixTimestamp64::FunctionFromUnixTimestamp64(size_t target_scale_, const char * name_, ContextPtr context) + : target_scale(target_scale_) + , name(name_) + , allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) +{} + +} diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index e282bcfbfe2..bbd7e586a67 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -107,11 +107,7 @@ private: const bool allow_nonconst_timezone_arguments; public: - FunctionFromUnixTimestamp64(size_t target_scale_, const char * name_, ContextPtr context) - : target_scale(target_scale_) - , name(name_) - , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) - {} + FunctionFromUnixTimestamp64(size_t target_scale_, const char * name_, ContextPtr context); String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index ab10d402df4..f77d2f1f350 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -728,10 +728,10 @@ public: REGISTER_FUNCTION(BinaryRepr) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp index e01967274f4..0a97d029f84 100644 --- a/src/Functions/FunctionsCodingIP.cpp +++ b/src/Functions/FunctionsCodingIP.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -1168,10 +1169,10 @@ REGISTER_FUNCTION(Coding) factory.registerFunction>(); /// MySQL compatibility aliases: - factory.registerAlias("INET_ATON", FunctionIPv4StringToNum::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("INET6_NTOA", FunctionIPv6NumToString::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("INET6_ATON", FunctionIPv6StringToNum::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("INET_NTOA", NameFunctionIPv4NumToString::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("INET_ATON", FunctionIPv4StringToNum::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("INET6_NTOA", FunctionIPv6NumToString::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("INET6_ATON", FunctionIPv6StringToNum::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("INET_NTOA", NameFunctionIPv4NumToString::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/FunctionsCodingULID.cpp b/src/Functions/FunctionsCodingULID.cpp index ff040945a15..b67224a5625 100644 --- a/src/Functions/FunctionsCodingULID.cpp +++ b/src/Functions/FunctionsCodingULID.cpp @@ -180,8 +180,7 @@ An optional second argument can be passed to specify a timezone for the timestam {"ulid", "SELECT ULIDStringToDateTime(generateULID())", ""}, {"timezone", "SELECT ULIDStringToDateTime(generateULID(), 'Asia/Istanbul')", ""}}, .categories{"ULID"} - }, - FunctionFactory::CaseSensitive); + }); } } diff --git a/src/Functions/FunctionsCodingUUID.cpp b/src/Functions/FunctionsCodingUUID.cpp index 6a44f4263a8..83fdcbc4af9 100644 --- a/src/Functions/FunctionsCodingUUID.cpp +++ b/src/Functions/FunctionsCodingUUID.cpp @@ -496,8 +496,8 @@ This function accepts a UUID and returns a FixedString(16) as its binary represe │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │ @( FunctionDocumentation{ @@ -509,8 +509,7 @@ An optional second argument can be passed to specify a timezone for the timestam .examples{ {"uuid","select UUIDv7ToDateTime(generateUUIDv7())", ""}, {"uuid","select generateUUIDv7() as uuid, UUIDv7ToDateTime(uuid), UUIDv7ToDateTime(uuid, 'America/New_York')", ""}}, - .categories{"UUID"}}, - FunctionFactory::CaseSensitive); + .categories{"UUID"}}); } } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index f3e54d2fbd9..675283d011e 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -5223,7 +5224,7 @@ REGISTER_FUNCTION(Conversion) /// MySQL compatibility alias. Cannot be registered as alias, /// because we don't want it to be normalized to toDate in queries, /// otherwise CREATE DICTIONARY query breaks. - factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::Case::Insensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 27717ea3611..95c54ac9528 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1184,7 +1184,7 @@ private: if (icolumn->size() != vec_to.size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}", - icolumn->getName(), icolumn->size(), vec_to.size(), getName()); + icolumn->getName(), icolumn->size(), vec_to.size(), getName()); if constexpr (Keyed) if (key_cols.size() != vec_to.size() && key_cols.size() != 1) @@ -1223,6 +1223,9 @@ private: else executeGeneric(key_cols, icolumn, vec_to); } + /// Return a fixed random-looking magic number when input is empty. + static constexpr auto filler = 0xe28dbde7fe22e41c; + void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const { /// Flattening of tuples. @@ -1231,6 +1234,11 @@ private: const auto & tuple_columns = tuple->getColumns(); const DataTypes & tuple_types = typeid_cast(*type).getElements(); size_t tuple_size = tuple_columns.size(); + + if (0 == tuple_size && is_first) + for (auto & hash : vec_to) + hash = static_cast(filler); + for (size_t i = 0; i < tuple_size; ++i) executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); } @@ -1239,6 +1247,11 @@ private: const auto & tuple_columns = tuple_const->getColumns(); const DataTypes & tuple_types = typeid_cast(*type).getElements(); size_t tuple_size = tuple_columns.size(); + + if (0 == tuple_size && is_first) + for (auto & hash : vec_to) + hash = static_cast(filler); + for (size_t i = 0; i < tuple_size; ++i) { auto tmp = ColumnConst::create(tuple_columns[i], column->size()); @@ -1300,10 +1313,7 @@ public: constexpr size_t first_data_argument = Keyed; if (arguments.size() <= first_data_argument) - { - /// Return a fixed random-looking magic number when input is empty - vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); - } + vec_to.assign(input_rows_count, static_cast(filler)); KeyColumnsType key_cols{}; if constexpr (Keyed) diff --git a/src/Functions/FunctionsHashingMisc.cpp b/src/Functions/FunctionsHashingMisc.cpp index 38f16af0e6d..5cc29215fe3 100644 --- a/src/Functions/FunctionsHashingMisc.cpp +++ b/src/Functions/FunctionsHashingMisc.cpp @@ -41,8 +41,7 @@ REGISTER_FUNCTION(Hashing) .description="Calculates value of XXH3 64-bit hash function. Refer to https://github.com/Cyan4973/xxHash for detailed documentation.", .examples{{"hash", "SELECT xxh3('ClickHouse')", ""}}, .categories{"Hash"} - }, - FunctionFactory::CaseSensitive); + }); factory.registerFunction(); diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index db1602b1939..47040545677 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 2f5ce6deebf..e1814150da6 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -29,7 +29,7 @@ REGISTER_FUNCTION(Logical) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerFunction({}, FunctionFactory::CaseInsensitive); /// Operator NOT(x) can be parsed as a function. + factory.registerFunction({}, FunctionFactory::Case::Insensitive); /// Operator NOT(x) can be parsed as a function. } namespace ErrorCodes diff --git a/src/Functions/FunctionsMultiStringFuzzySearch.h b/src/Functions/FunctionsMultiStringFuzzySearch.h index 18b411e9839..a92a6570279 100644 --- a/src/Functions/FunctionsMultiStringFuzzySearch.h +++ b/src/Functions/FunctionsMultiStringFuzzySearch.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Functions/FunctionsMultiStringSearch.h b/src/Functions/FunctionsMultiStringSearch.h index c0ed90aa042..03db2651fd0 100644 --- a/src/Functions/FunctionsMultiStringSearch.h +++ b/src/Functions/FunctionsMultiStringSearch.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/FunctionsOpDate.cpp b/src/Functions/FunctionsOpDate.cpp index 7355848f73f..c4b154736e0 100644 --- a/src/Functions/FunctionsOpDate.cpp +++ b/src/Functions/FunctionsOpDate.cpp @@ -99,8 +99,8 @@ using FunctionSubDate = FunctionOpDate; REGISTER_FUNCTION(AddInterval) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/FunctionsRound.cpp b/src/Functions/FunctionsRound.cpp index 059476acb40..d87a9e7ca43 100644 --- a/src/Functions/FunctionsRound.cpp +++ b/src/Functions/FunctionsRound.cpp @@ -7,16 +7,16 @@ namespace DB REGISTER_FUNCTION(Round) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerFunction({}, FunctionFactory::CaseSensitive); - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerFunction({}, FunctionFactory::Case::Sensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); factory.registerFunction(); /// Compatibility aliases. - factory.registerAlias("ceiling", "ceil", FunctionFactory::CaseInsensitive); - factory.registerAlias("truncate", "trunc", FunctionFactory::CaseInsensitive); + factory.registerAlias("ceiling", "ceil", FunctionFactory::Case::Insensitive); + factory.registerAlias("truncate", "trunc", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/FunctionsStringHashFixedString.cpp b/src/Functions/FunctionsStringHashFixedString.cpp index e3b1b82c92f..01e989a7f2c 100644 --- a/src/Functions/FunctionsStringHashFixedString.cpp +++ b/src/Functions/FunctionsStringHashFixedString.cpp @@ -428,8 +428,7 @@ REGISTER_FUNCTION(HashFixedStrings) It returns a BLAKE3 hash as a byte array with type FixedString(32). )", .examples{{"hash", "SELECT hex(BLAKE3('ABC'))", ""}}, - .categories{"Hash"}}, - FunctionFactory::CaseSensitive); + .categories{"Hash"}}); # endif } #endif diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h index 924a66c1130..fba6336ebff 100644 --- a/src/Functions/FunctionsStringSearch.h +++ b/src/Functions/FunctionsStringSearch.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/FunctionsTextClassification.h b/src/Functions/FunctionsTextClassification.h index 8e0f236366d..90e8af06ccc 100644 --- a/src/Functions/FunctionsTextClassification.h +++ b/src/Functions/FunctionsTextClassification.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 31695fc95d5..8b092ba9b6e 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -47,54 +47,85 @@ bool allArgumentsAreConstants(const ColumnsWithTypeAndName & args) return true; } +/// Replaces single low cardinality column in a function call by its dictionary +/// This can only happen after the arguments have been adapted in IFunctionOverloadResolver::getReturnType +/// as it's only possible if there is one low cardinality column and, optionally, const columns ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( ColumnsWithTypeAndName & args, bool can_be_executed_on_default_arguments, size_t input_rows_count) { - size_t num_rows = input_rows_count; + /// We return the LC indexes so the LC can be reconstructed with the function result ColumnPtr indexes; - /// Find first LowCardinality column and replace it to nested dictionary. - for (auto & column : args) + size_t number_low_cardinality_columns = 0; + size_t last_low_cardinality = 0; + size_t number_const_columns = 0; + size_t number_full_columns = 0; + + for (size_t i = 0; i < args.size(); i++) { - if (const auto * low_cardinality_column = checkAndGetColumn(column.column.get())) + auto const & arg = args[i]; + if (checkAndGetColumn(arg.column.get())) { - /// Single LowCardinality column is supported now. - if (indexes) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single dictionary argument for function."); - - const auto * low_cardinality_type = checkAndGetDataType(column.type.get()); - - if (!low_cardinality_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Incompatible type for LowCardinality column: {}", - column.type->getName()); - - if (can_be_executed_on_default_arguments) - { - /// Normal case, when function can be executed on values' default. - column.column = low_cardinality_column->getDictionary().getNestedColumn(); - indexes = low_cardinality_column->getIndexesPtr(); - } - else - { - /// Special case when default value can't be used. Example: 1 % LowCardinality(Int). - /// LowCardinality always contains default, so 1 % 0 will throw exception in normal case. - auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size()); - column.column = dict_encoded.dictionary; - indexes = dict_encoded.indexes; - } - - num_rows = column.column->size(); - column.type = low_cardinality_type->getDictionaryType(); + number_low_cardinality_columns++; + last_low_cardinality = i; } + else if (checkAndGetColumn(arg.column.get())) + number_const_columns++; + else + number_full_columns++; } - /// Change size of constants. + if (!number_low_cardinality_columns && !number_const_columns) + return nullptr; + + if (number_full_columns > 0 || number_low_cardinality_columns > 1) + { + /// This should not be possible but currently there are multiple tests in CI failing because of it + /// TODO: Fix those cases, then enable this exception +#if 0 + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected low cardinality types found. Low cardinality: {}. Full {}. Const {}", + number_low_cardinality_columns, number_full_columns, number_const_columns); +#else + return nullptr; +#endif + } + else if (number_low_cardinality_columns == 1) + { + auto & lc_arg = args[last_low_cardinality]; + + const auto * low_cardinality_type = checkAndGetDataType(lc_arg.type.get()); + if (!low_cardinality_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", lc_arg.type->getName()); + + const auto * low_cardinality_column = checkAndGetColumn(lc_arg.column.get()); + chassert(low_cardinality_column); + + if (can_be_executed_on_default_arguments) + { + /// Normal case, when function can be executed on values' default. + lc_arg.column = low_cardinality_column->getDictionary().getNestedColumn(); + indexes = low_cardinality_column->getIndexesPtr(); + } + else + { + /// Special case when default value can't be used. Example: 1 % LowCardinality(Int). + /// LowCardinality always contains default, so 1 % 0 will throw exception in normal case. + auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size()); + lc_arg.column = dict_encoded.dictionary; + indexes = dict_encoded.indexes; + } + + /// The new column will have a different number of rows, normally less but occasionally it might be more (NULL) + input_rows_count = lc_arg.column->size(); + lc_arg.type = low_cardinality_type->getDictionaryType(); + } + + /// Change size of constants for (auto & column : args) { if (const auto * column_const = checkAndGetColumn(column.column.get())) { - column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), num_rows); + column.column = ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), input_rows_count); column.type = recursiveRemoveLowCardinality(column.type); } } @@ -270,6 +301,8 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments(); const auto & dictionary_type = res_low_cardinality_type->getDictionaryType(); + /// The arguments should have been adapted in IFunctionOverloadResolver::getReturnType + /// So there is only one low cardinality column (and optionally some const columns) and no full column ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count); diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index a66456cabee..12931b51df2 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -230,6 +230,17 @@ public: virtual bool isDeterministicInScopeOfQuery() const { return true; } + /** This is a special flags for functions which return constant value for the server, + * but the result could be different for different servers in distributed query. + * + * This functions can't support constant folding on the initiator, but can on the follower. + * We can't apply some optimizations as well (e.g. can't remove constant result from GROUP BY key). + * So, it is convenient to have a special flag for them. + * + * Examples are: "__getScalar" and every function from serverConstants.cpp + */ + virtual bool isServerConstant() const { return false; } + /** Lets you know if the function is monotonic in a range of values. * This is used to work with the index in a sorted chunk of data. * And allows to use the index not only when it is written, for example `date >= const`, but also, for example, `toMonth(date) >= 11`. @@ -488,6 +499,7 @@ public: virtual bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const { return false; } virtual bool isDeterministic() const { return true; } virtual bool isDeterministicInScopeOfQuery() const { return true; } + virtual bool isServerConstant() const { return false; } virtual bool isStateful() const { return false; } using ShortCircuitSettings = IFunctionBase::ShortCircuitSettings; diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index 04bd03a776e..c9929a083c1 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -86,6 +86,8 @@ public: bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); } + bool isServerConstant() const override { return function->isServerConstant(); } + bool isShortCircuit(ShortCircuitSettings & settings, size_t number_of_arguments) const override { return function->isShortCircuit(settings, number_of_arguments); } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & args) const override { return function->isSuitableForShortCircuitArgumentsExecution(args); } diff --git a/src/Functions/JSONArrayLength.cpp b/src/Functions/JSONArrayLength.cpp index 73dd55f1266..24e93440454 100644 --- a/src/Functions/JSONArrayLength.cpp +++ b/src/Functions/JSONArrayLength.cpp @@ -104,7 +104,7 @@ REGISTER_FUNCTION(JSONArrayLength) .description="Returns the number of elements in the outermost JSON array. The function returns NULL if input JSON string is invalid."}); /// For Spark compatibility. - factory.registerAlias("JSON_ARRAY_LENGTH", "JSONArrayLength", FunctionFactory::CaseInsensitive); + factory.registerAlias("JSON_ARRAY_LENGTH", "JSONArrayLength", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index ef2c788bf43..48daf13ba68 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -4,6 +4,7 @@ #include #include +#include #include #include diff --git a/src/Functions/UTCTimestamp.cpp b/src/Functions/UTCTimestamp.cpp index acc34b0a974..bc8e1b28431 100644 --- a/src/Functions/UTCTimestamp.cpp +++ b/src/Functions/UTCTimestamp.cpp @@ -117,8 +117,8 @@ Example: )", .examples{ {"typical", "SELECT UTCTimestamp();", ""}}, - .categories{"Dates and Times"}}, FunctionFactory::CaseInsensitive); - factory.registerAlias("UTC_timestamp", UTCTimestampOverloadResolver::name, FunctionFactory::CaseInsensitive); + .categories{"Dates and Times"}}, FunctionFactory::Case::Insensitive); + factory.registerAlias("UTC_timestamp", UTCTimestampOverloadResolver::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/UTCTimestampTransform.cpp b/src/Functions/UTCTimestampTransform.cpp index 6d301270d8e..36ec520068f 100644 --- a/src/Functions/UTCTimestampTransform.cpp +++ b/src/Functions/UTCTimestampTransform.cpp @@ -144,8 +144,8 @@ REGISTER_FUNCTION(UTCTimestampTransform) { factory.registerFunction(); factory.registerFunction(); - factory.registerAlias("to_utc_timestamp", NameToUTCTimestamp::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("from_utc_timestamp", NameFromUTCTimestamp::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("to_utc_timestamp", NameToUTCTimestamp::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("from_utc_timestamp", NameFromUTCTimestamp::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp index 2c031158c48..a20e1807487 100644 --- a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -184,7 +185,7 @@ ExternalLoader::LoadableMutablePtr ExternalUserDefinedExecutableFunctionsLoader: pool_size = config.getUInt64(key_in_config + ".pool_size", 16); max_command_execution_time = config.getUInt64(key_in_config + ".max_command_execution_time", 10); - size_t max_execution_time_seconds = static_cast(getContext()->getSettings().max_execution_time.totalSeconds()); + size_t max_execution_time_seconds = static_cast(getContext()->getSettingsRef().max_execution_time.totalSeconds()); if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds) max_command_execution_time = max_execution_time_seconds; } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp index c15958f81c9..da79e17f18a 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index b406cc8d317..4c004d2537c 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include #include #include diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp index 766d63eafb0..01e7e3995fa 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp @@ -14,7 +14,7 @@ #include #include #include - +#include namespace DB { diff --git a/src/Functions/abs.cpp b/src/Functions/abs.cpp index 9ac2363f765..742d3b85619 100644 --- a/src/Functions/abs.cpp +++ b/src/Functions/abs.cpp @@ -51,7 +51,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity REGISTER_FUNCTION(Abs) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/acos.cpp b/src/Functions/acos.cpp index bc300ee77fb..39895fed64a 100644 --- a/src/Functions/acos.cpp +++ b/src/Functions/acos.cpp @@ -14,7 +14,7 @@ using FunctionAcos = FunctionMathUnary>; REGISTER_FUNCTION(Acos) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/acosh.cpp b/src/Functions/acosh.cpp index 5b071da9c40..2bab84c77af 100644 --- a/src/Functions/acosh.cpp +++ b/src/Functions/acosh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AcoshName - { - static constexpr auto name = "acosh"; - }; - using FunctionAcosh = FunctionMathUnary>; + +struct AcoshName +{ + static constexpr auto name = "acosh"; +}; +using FunctionAcosh = FunctionMathUnary>; } diff --git a/src/Functions/addMicroseconds.cpp b/src/Functions/addMicroseconds.cpp index 0dcd6b4452f..8c0ae06dcd0 100644 --- a/src/Functions/addMicroseconds.cpp +++ b/src/Functions/addMicroseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddMicroseconds) { factory.registerFunction(); diff --git a/src/Functions/addMilliseconds.cpp b/src/Functions/addMilliseconds.cpp index 0e2b696d367..83e1f96ec4b 100644 --- a/src/Functions/addMilliseconds.cpp +++ b/src/Functions/addMilliseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddMilliseconds) { factory.registerFunction(); diff --git a/src/Functions/addNanoseconds.cpp b/src/Functions/addNanoseconds.cpp index 93eadc814d9..8f9a54752b9 100644 --- a/src/Functions/addNanoseconds.cpp +++ b/src/Functions/addNanoseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddNanoseconds) { factory.registerFunction(); diff --git a/src/Functions/aes_encrypt_mysql.cpp b/src/Functions/aes_encrypt_mysql.cpp index fb120151c25..33733f92b27 100644 --- a/src/Functions/aes_encrypt_mysql.cpp +++ b/src/Functions/aes_encrypt_mysql.cpp @@ -7,7 +7,6 @@ namespace DB { - namespace { diff --git a/src/Functions/appendTrailingCharIfAbsent.cpp b/src/Functions/appendTrailingCharIfAbsent.cpp index a5554171aaa..0e57d5c55ce 100644 --- a/src/Functions/appendTrailingCharIfAbsent.cpp +++ b/src/Functions/appendTrailingCharIfAbsent.cpp @@ -57,7 +57,7 @@ private: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & column = arguments[0].column; const auto & column_char = arguments[1].column; @@ -80,14 +80,13 @@ private: auto & dst_data = col_res->getChars(); auto & dst_offsets = col_res->getOffsets(); - const auto size = src_offsets.size(); - dst_data.resize(src_data.size() + size); - dst_offsets.resize(size); + dst_data.resize(src_data.size() + input_rows_count); + dst_offsets.resize(input_rows_count); ColumnString::Offset src_offset{}; ColumnString::Offset dst_offset{}; - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) { const auto src_length = src_offsets[i] - src_offset; memcpySmallAllowReadWriteOverflow15(&dst_data[dst_offset], &src_data[src_offset], src_length); diff --git a/src/Functions/array/array.cpp b/src/Functions/array/array.cpp index 03b51808799..dfe589fb74f 100644 --- a/src/Functions/array/array.cpp +++ b/src/Functions/array/array.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/src/Functions/array/arrayFlatten.cpp b/src/Functions/array/arrayFlatten.cpp index d4eb8eebeee..553ad82bd53 100644 --- a/src/Functions/array/arrayFlatten.cpp +++ b/src/Functions/array/arrayFlatten.cpp @@ -123,7 +123,7 @@ private: REGISTER_FUNCTION(ArrayFlatten) { factory.registerFunction(); - factory.registerAlias("flatten", "arrayFlatten", FunctionFactory::CaseInsensitive); + factory.registerAlias("flatten", "arrayFlatten", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp index 10cb51d27d2..fa17aa46464 100644 --- a/src/Functions/array/arrayShuffle.cpp +++ b/src/Functions/array/arrayShuffle.cpp @@ -196,7 +196,7 @@ It is possible to override the seed to produce stable results: {"explicit_seed", "SELECT arrayShuffle([1, 2, 3, 4], 41)", ""}, {"materialize", "SELECT arrayShuffle(materialize([1, 2, 3]), 42), arrayShuffle([1, 2, 3], 42) FROM numbers(10)", ""}}, .categories{"Array"}}, - FunctionFactory::CaseInsensitive); + FunctionFactory::Case::Insensitive); factory.registerFunction>( FunctionDocumentation{ @@ -224,7 +224,7 @@ It is possible to override the seed to produce stable results: {"materialize", "SELECT arrayPartialShuffle(materialize([1, 2, 3, 4]), 2, 42), arrayPartialShuffle([1, 2, 3], 2, 42) FROM numbers(10)", ""}}, .categories{"Array"}}, - FunctionFactory::CaseInsensitive); + FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/array/length.cpp b/src/Functions/array/length.cpp index 91a5e5fdec2..d81c071b55e 100644 --- a/src/Functions/array/length.cpp +++ b/src/Functions/array/length.cpp @@ -100,8 +100,8 @@ It is ok to have ASCII NUL bytes in strings, and they will be counted as well. }, .categories{"String", "Array"} }, - FunctionFactory::CaseInsensitive); - factory.registerAlias("OCTET_LENGTH", "length", FunctionFactory::CaseInsensitive); + FunctionFactory::Case::Insensitive); + factory.registerAlias("OCTET_LENGTH", "length", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 1ac698a2745..b8b03ab5e74 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp index b43c3221391..0d50e5d203b 100644 --- a/src/Functions/ascii.cpp +++ b/src/Functions/ascii.cpp @@ -45,9 +45,7 @@ struct AsciiImpl size_t size = data.size() / n; for (size_t i = 0; i < size; ++i) - { res[i] = doAscii(data, i * n, n); - } } [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) @@ -90,7 +88,7 @@ If s is empty, the result is 0. If the first character is not an ASCII character )", .examples{{"ascii", "SELECT ascii('234')", ""}}, .categories{"String"} - }, FunctionFactory::CaseInsensitive); + }, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/asin.cpp b/src/Functions/asin.cpp index 3049b025d5e..85faf8c275d 100644 --- a/src/Functions/asin.cpp +++ b/src/Functions/asin.cpp @@ -41,7 +41,7 @@ For more details, see [https://en.wikipedia.org/wiki/Inverse_trigonometric_funct {"nan", "SELECT asin(1.1), asin(-2), asin(inf), asin(nan)", ""}}, .categories{"Mathematical", "Trigonometric"} }, - FunctionFactory::CaseInsensitive); + FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/asinh.cpp b/src/Functions/asinh.cpp index 6af832ae07c..b5e3626148f 100644 --- a/src/Functions/asinh.cpp +++ b/src/Functions/asinh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AsinhName - { - static constexpr auto name = "asinh"; - }; - using FunctionAsinh = FunctionMathUnary>; + +struct AsinhName +{ + static constexpr auto name = "asinh"; +}; +using FunctionAsinh = FunctionMathUnary>; } diff --git a/src/Functions/atan.cpp b/src/Functions/atan.cpp index 32a0f06db8a..3f74c510487 100644 --- a/src/Functions/atan.cpp +++ b/src/Functions/atan.cpp @@ -14,7 +14,7 @@ using FunctionAtan = FunctionMathUnary>; REGISTER_FUNCTION(Atan) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/atan2.cpp b/src/Functions/atan2.cpp index 7be177f6dfb..218f4c5406f 100644 --- a/src/Functions/atan2.cpp +++ b/src/Functions/atan2.cpp @@ -5,17 +5,18 @@ namespace DB { namespace { - struct Atan2Name - { - static constexpr auto name = "atan2"; - }; - using FunctionAtan2 = FunctionMathBinaryFloat64>; + +struct Atan2Name +{ + static constexpr auto name = "atan2"; +}; +using FunctionAtan2 = FunctionMathBinaryFloat64>; } REGISTER_FUNCTION(Atan2) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/atanh.cpp b/src/Functions/atanh.cpp index fab25414725..a36f5bcbcf0 100644 --- a/src/Functions/atanh.cpp +++ b/src/Functions/atanh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AtanhName - { - static constexpr auto name = "atanh"; - }; - using FunctionAtanh = FunctionMathUnary>; + +struct AtanhName +{ + static constexpr auto name = "atanh"; +}; +using FunctionAtanh = FunctionMathUnary>; } diff --git a/src/Functions/base58Encode.cpp b/src/Functions/base58Encode.cpp index cf790ebddab..3ae2fb12c5e 100644 --- a/src/Functions/base58Encode.cpp +++ b/src/Functions/base58Encode.cpp @@ -3,8 +3,10 @@ namespace DB { + REGISTER_FUNCTION(Base58Encode) { factory.registerFunction>(); } + } diff --git a/src/Functions/base64Decode.cpp b/src/Functions/base64Decode.cpp index 50278c4b0b2..349475af3f0 100644 --- a/src/Functions/base64Decode.cpp +++ b/src/Functions/base64Decode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64Decode) { FunctionDocumentation::Description description = R"(Accepts a String and decodes it from base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error. Alias: FROM_BASE64.)"; @@ -17,8 +18,9 @@ REGISTER_FUNCTION(Base64Decode) factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); /// MySQL compatibility alias. - factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::CaseInsensitive); + factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::Case::Insensitive); } + } #endif diff --git a/src/Functions/base64Encode.cpp b/src/Functions/base64Encode.cpp index d6e63c38a4c..fe0fa642599 100644 --- a/src/Functions/base64Encode.cpp +++ b/src/Functions/base64Encode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64Encode) { FunctionDocumentation::Description description = R"(Encodes a String as base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Alias: TO_BASE64.)"; @@ -17,8 +18,9 @@ REGISTER_FUNCTION(Base64Encode) factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); /// MySQL compatibility alias. - factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::CaseInsensitive); + factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::Case::Insensitive); } + } #endif diff --git a/src/Functions/base64URLDecode.cpp b/src/Functions/base64URLDecode.cpp index f5766dc60bd..f256e111619 100644 --- a/src/Functions/base64URLDecode.cpp +++ b/src/Functions/base64URLDecode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64URLDecode) { FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; @@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLDecode) factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } + } #endif diff --git a/src/Functions/base64URLEncode.cpp b/src/Functions/base64URLEncode.cpp index 73a465a30c5..215712f7586 100644 --- a/src/Functions/base64URLEncode.cpp +++ b/src/Functions/base64URLEncode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64URLEncode) { FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; @@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLEncode) factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } + } #endif diff --git a/src/Functions/byteSize.cpp b/src/Functions/byteSize.cpp index 93a3a86641a..d366a1b2e12 100644 --- a/src/Functions/byteSize.cpp +++ b/src/Functions/byteSize.cpp @@ -67,11 +67,11 @@ public: const IColumn * column = arguments[arg_num].column.get(); if (arg_num == 0) - for (size_t row_num = 0; row_num < input_rows_count; ++row_num) - vec_res[row_num] = column->byteSizeAt(row_num); + for (size_t row = 0; row < input_rows_count; ++row) + vec_res[row] = column->byteSizeAt(row); else - for (size_t row_num = 0; row_num < input_rows_count; ++row_num) - vec_res[row_num] += column->byteSizeAt(row_num); + for (size_t row = 0; row < input_rows_count; ++row) + vec_res[row] += column->byteSizeAt(row); } return result_col; diff --git a/src/Functions/byteSwap.cpp b/src/Functions/byteSwap.cpp index 2a343a07720..2094ec4fa1a 100644 --- a/src/Functions/byteSwap.cpp +++ b/src/Functions/byteSwap.cpp @@ -10,6 +10,7 @@ extern const int NOT_IMPLEMENTED; namespace { + template requires std::is_integral_v T byteSwap(T x) @@ -100,7 +101,7 @@ One use-case of this function is reversing IPv4s: {"64-bit", "SELECT byteSwap(123294967295)", "18439412204227788800"}, }, .categories{"Mathematical", "Arithmetic"}}, - FunctionFactory::CaseInsensitive); + FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/caseWithExpression.cpp b/src/Functions/caseWithExpression.cpp index 71fccc8436e..f0a620489ef 100644 --- a/src/Functions/caseWithExpression.cpp +++ b/src/Functions/caseWithExpression.cpp @@ -98,8 +98,7 @@ public: /// Execute transform. ColumnsWithTypeAndName transform_args{args.front(), src_array_col, dst_array_col, args.back()}; - return FunctionFactory::instance().get("transform", context)->build(transform_args) - ->execute(transform_args, result_type, input_rows_count); + return FunctionFactory::instance().get("transform", context)->build(transform_args)->execute(transform_args, result_type, input_rows_count); } private: diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 995b5fa91e7..1ece17ca1e3 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Functions/coalesce.cpp b/src/Functions/coalesce.cpp index 722f32af523..19da6a85b38 100644 --- a/src/Functions/coalesce.cpp +++ b/src/Functions/coalesce.cpp @@ -180,7 +180,7 @@ private: REGISTER_FUNCTION(Coalesce) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index b011c33e02a..5c5e089e740 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -240,7 +240,7 @@ private: REGISTER_FUNCTION(Concat) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); factory.registerFunction(); } diff --git a/src/Functions/concatWithSeparator.cpp b/src/Functions/concatWithSeparator.cpp index ed02f331192..1d38ef87558 100644 --- a/src/Functions/concatWithSeparator.cpp +++ b/src/Functions/concatWithSeparator.cpp @@ -193,7 +193,7 @@ The function is named “injective” if it always returns different result for .categories{"String"}}); /// Compatibility with Spark and MySQL: - factory.registerAlias("concat_ws", "concatWithSeparator", FunctionFactory::CaseInsensitive); + factory.registerAlias("concat_ws", "concatWithSeparator", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/connectionId.cpp b/src/Functions/connectionId.cpp index 9c53482482b..c1036b2ddbe 100644 --- a/src/Functions/connectionId.cpp +++ b/src/Functions/connectionId.cpp @@ -33,8 +33,8 @@ public: REGISTER_FUNCTION(ConnectionId) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerAlias("connection_id", "connectionID", FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerAlias("connection_id", "connectionID", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/convertCharset.cpp b/src/Functions/convertCharset.cpp index b3b7394acb9..d998e88e7c2 100644 --- a/src/Functions/convertCharset.cpp +++ b/src/Functions/convertCharset.cpp @@ -88,7 +88,8 @@ private: static void convert(const String & from_charset, const String & to_charset, const ColumnString::Chars & from_chars, const ColumnString::Offsets & from_offsets, - ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets) + ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets, + size_t input_rows_count) { auto converter_from = getConverter(from_charset); auto converter_to = getConverter(to_charset); @@ -96,12 +97,11 @@ private: ColumnString::Offset current_from_offset = 0; ColumnString::Offset current_to_offset = 0; - size_t size = from_offsets.size(); - to_offsets.resize(size); + to_offsets.resize(input_rows_count); PODArray uchars; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t from_string_size = from_offsets[i] - current_from_offset - 1; @@ -184,7 +184,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & arg_from = arguments[0]; const ColumnWithTypeAndName & arg_charset_from = arguments[1]; @@ -204,7 +204,7 @@ public: if (const ColumnString * col_from = checkAndGetColumn(arg_from.column.get())) { auto col_to = ColumnString::create(); - convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets()); + convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets(), input_rows_count); return col_to; } else diff --git a/src/Functions/cos.cpp b/src/Functions/cos.cpp index 3496373a9d5..40fdede0e1c 100644 --- a/src/Functions/cos.cpp +++ b/src/Functions/cos.cpp @@ -13,7 +13,7 @@ using FunctionCos = FunctionMathUnary>; REGISTER_FUNCTION(Cos) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/cosh.cpp b/src/Functions/cosh.cpp index 54b52051aab..f4302292303 100644 --- a/src/Functions/cosh.cpp +++ b/src/Functions/cosh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct CoshName - { - static constexpr auto name = "cosh"; - }; - using FunctionCosh = FunctionMathUnary>; + +struct CoshName +{ + static constexpr auto name = "cosh"; +}; +using FunctionCosh = FunctionMathUnary>; } diff --git a/src/Functions/countMatches.cpp b/src/Functions/countMatches.cpp index a8620080012..4db48b1305f 100644 --- a/src/Functions/countMatches.cpp +++ b/src/Functions/countMatches.cpp @@ -22,8 +22,8 @@ namespace DB REGISTER_FUNCTION(CountMatches) { - factory.registerFunction>({}, FunctionFactory::CaseSensitive); - factory.registerFunction>({}, FunctionFactory::CaseSensitive); + factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/src/Functions/countSubstrings.cpp b/src/Functions/countSubstrings.cpp index 843b81437f5..137edb179b2 100644 --- a/src/Functions/countSubstrings.cpp +++ b/src/Functions/countSubstrings.cpp @@ -19,6 +19,6 @@ using FunctionCountSubstrings = FunctionsStringSearch({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp b/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp index 3f71bca63d2..99ae4f1927e 100644 --- a/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp +++ b/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp @@ -13,8 +13,7 @@ struct NameCountSubstringsCaseInsensitiveUTF8 static constexpr auto name = "countSubstringsCaseInsensitiveUTF8"; }; -using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch< - CountSubstringsImpl>; +using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch>; } diff --git a/src/Functions/currentDatabase.cpp b/src/Functions/currentDatabase.cpp index 954899c3c2b..16cb43ebb04 100644 --- a/src/Functions/currentDatabase.cpp +++ b/src/Functions/currentDatabase.cpp @@ -54,9 +54,9 @@ public: REGISTER_FUNCTION(CurrentDatabase) { factory.registerFunction(); - factory.registerAlias("DATABASE", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("SCHEMA", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("current_database", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("DATABASE", FunctionCurrentDatabase::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("SCHEMA", FunctionCurrentDatabase::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("current_database", FunctionCurrentDatabase::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/currentSchemas.cpp b/src/Functions/currentSchemas.cpp index 322e719eb17..0a128d0e908 100644 --- a/src/Functions/currentSchemas.cpp +++ b/src/Functions/currentSchemas.cpp @@ -80,8 +80,8 @@ Requires a boolean parameter, but it is ignored actually. It is required just fo {"common", "SELECT current_schemas(true);", "['default']"} } }, - FunctionFactory::CaseInsensitive); - factory.registerAlias("current_schemas", FunctionCurrentSchemas::name, FunctionFactory::CaseInsensitive); + FunctionFactory::Case::Insensitive); + factory.registerAlias("current_schemas", FunctionCurrentSchemas::name, FunctionFactory::Case::Insensitive); } diff --git a/src/Functions/currentUser.cpp b/src/Functions/currentUser.cpp index 1679c56a929..9f48f15ffb3 100644 --- a/src/Functions/currentUser.cpp +++ b/src/Functions/currentUser.cpp @@ -54,8 +54,8 @@ public: REGISTER_FUNCTION(CurrentUser) { factory.registerFunction(); - factory.registerAlias("user", FunctionCurrentUser::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("current_user", FunctionCurrentUser::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("user", FunctionCurrentUser::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("current_user", FunctionCurrentUser::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 8e8865db7ed..157068a7c8d 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -26,8 +26,6 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; } @@ -45,84 +43,82 @@ public: template void dispatchForColumns( - const IColumn & x, const IColumn & y, + const IColumn & col_x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { - if (const auto * x_vec_16 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_16, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_32, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32_s = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_32_s, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_64 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_64, y, timezone_x, timezone_y, result); - else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_16->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_32->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32_s = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_32_s->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_64 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_64->getValue>(), y, timezone_x, timezone_y, result); + if (const auto * x_vec_16 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_16, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_vec_32 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_32, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_vec_32_s = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_32_s, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_vec_64 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_64, col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_16 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_16->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_32 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_32->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_32_s = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_32_s->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * x_const_64 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_64->getValue>(), col_y, timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void dispatchForSecondColumn( - const LeftColumnType & x, const IColumn & y, + const LeftColumnType & x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, result); - else if (const auto * y_vec_64 = checkAndGetColumn(&y)) - vectorVector(x, *y_vec_64, timezone_x, timezone_y, result); - else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32_s = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_64 = checkAndGetColumnConst(&y)) - vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); + if (const auto * y_vec_16 = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32 = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32_s = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_64 = checkAndGetColumn(&col_y)) + vectorVector(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_16 = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_32 = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_32_s = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_const_64 = checkAndGetColumnConst(&col_y)) + vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void dispatchConstForSecondColumn( - T1 x, const IColumn & y, + T1 x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) - constantVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) - constantVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) - constantVector(x, *y_vec_32_s, timezone_x, timezone_y, result); - else if (const auto * y_vec_64 = checkAndGetColumn(&y)) - constantVector(x, *y_vec_64, timezone_x, timezone_y, result); + if (const auto * y_vec_16 = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32 = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_32_s = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result); + else if (const auto * y_vec_64 = checkAndGetColumn(&col_y)) + constantVector(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void vectorVector( const LeftColumnType & x, const RightColumnType & y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & x_data = x.getData(); @@ -130,14 +126,15 @@ public: const auto transform_x = TransformDateTime64(getScale(x)); const auto transform_y = TransformDateTime64(getScale(y)); - for (size_t i = 0, size = x.size(); i < size; ++i) - result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y); + for (size_t i = 0; i < input_rows_count; ++i) + result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y); } template void vectorConstant( const LeftColumnType & x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & x_data = x.getData(); @@ -145,7 +142,7 @@ public: const auto transform_y = TransformDateTime64(getScale(y)); const auto y_value = stripDecimalFieldValue(y); - for (size_t i = 0, size = x.size(); i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) result[i] = calculate(transform_x, transform_y, x_data[i], y_value, timezone_x, timezone_y); } @@ -153,6 +150,7 @@ public: void constantVector( T1 x, const RightColumnType & y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & y_data = y.getData(); @@ -160,20 +158,22 @@ public: const auto transform_y = TransformDateTime64(getScale(y)); const auto x_value = stripDecimalFieldValue(x); - for (size_t i = 0, size = y.size(); i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) result[i] = calculate(transform_x, transform_y, x_value, y_data[i], timezone_x, timezone_y); } template Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const { + auto res = static_cast(transform_y.execute(y, timezone_y)) - static_cast(transform_x.execute(x, timezone_x)); + if constexpr (is_diff) - return static_cast(transform_y.execute(y, timezone_y)) - - static_cast(transform_x.execute(x, timezone_x)); + { + return res; + } else { - auto res = static_cast(transform_y.execute(y, timezone_y)) - - static_cast(transform_x.execute(x, timezone_x)); + /// Adjust res: DateTimeComponentsWithFractionalPart a_comp; DateTimeComponentsWithFractionalPart b_comp; Int64 adjust_value; @@ -332,95 +332,73 @@ public: static constexpr auto name = is_relative ? "dateDiff" : "age"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } bool isVariadic() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 3 && arguments.size() != 4) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 3 or 4", - getName(), arguments.size()); + FunctionArgumentDescriptors mandatory_args{ + {"unit", static_cast(&isString), nullptr, "String"}, + {"startdate", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + {"enddate", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + }; - if (!isString(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument for function {} (unit) must be String", - getName()); + FunctionArgumentDescriptors optional_args{ + {"timezone", static_cast(&isString), nullptr, "String"}, + }; - if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument for function {} must be Date, Date32, DateTime or DateTime64", - getName()); - - if (!isDate(arguments[2]) && !isDate32(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument for function {} must be Date, Date32, DateTime or DateTime64", - getName() - ); - - if (arguments.size() == 4 && !isString(arguments[3])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Fourth argument for function {} (timezone) must be String", - getName()); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * unit_column = checkAndGetColumnConst(arguments[0].column.get()); - if (!unit_column) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "First argument for function {} must be constant String", - getName()); + const auto * col_unit = checkAndGetColumnConst(arguments[0].column.get()); + if (!col_unit) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument for function {} must be constant String", getName()); - String unit = Poco::toLower(unit_column->getValue()); + String unit = Poco::toLower(col_unit->getValue()); - const IColumn & x = *arguments[1].column; - const IColumn & y = *arguments[2].column; + const IColumn & col_x = *arguments[1].column; + const IColumn & col_y = *arguments[2].column; - size_t rows = input_rows_count; - auto res = ColumnInt64::create(rows); + auto col_res = ColumnInt64::create(input_rows_count); const auto & timezone_x = extractTimeZoneFromFunctionArguments(arguments, 3, 1); const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "years" || unit == "yy" || unit == "yyyy") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "quarter" || unit == "quarters" || unit == "qq" || unit == "q") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "month" || unit == "months" || unit == "mm" || unit == "m") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "week" || unit == "weeks" || unit == "wk" || unit == "ww") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "day" || unit == "days" || unit == "dd" || unit == "d") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "hour" || unit == "hours" || unit == "hh" || unit == "h") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "minute" || unit == "minutes" || unit == "mi" || unit == "n") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "second" || unit == "seconds" || unit == "ss" || unit == "s") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "millisecond" || unit == "milliseconds" || unit == "ms") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "nanosecond" || unit == "nanoseconds" || unit == "ns") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} does not support '{}' unit", getName(), unit); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); - return res; + return col_res; } private: DateDiffImpl impl{name}; @@ -437,50 +415,35 @@ public: static constexpr auto name = "timeDiff"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } - + String getName() const override { return name; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool isVariadic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } size_t getNumberOfArguments() const override { return 2; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 2", - getName(), arguments.size()); + FunctionArgumentDescriptors args{ + {"first_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + {"second_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + }; - if (!isDate(arguments[0]) && !isDate32(arguments[0]) && !isDateTime(arguments[0]) && !isDateTime64(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument for function {} must be Date, Date32, DateTime or DateTime64", - getName()); - - if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument for function {} must be Date, Date32, DateTime or DateTime64", - getName() - ); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const IColumn & x = *arguments[0].column; - const IColumn & y = *arguments[1].column; + const IColumn & col_x = *arguments[0].column; + const IColumn & col_y = *arguments[1].column; - size_t rows = input_rows_count; - auto res = ColumnInt64::create(rows); + auto col_res = ColumnInt64::create(input_rows_count); - impl.dispatchForColumns>(x, y, DateLUT::instance(), DateLUT::instance(), res->getData()); + impl.dispatchForColumns>(col_x, col_y, DateLUT::instance(), DateLUT::instance(), input_rows_count, col_res->getData()); - return res; + return col_res; } private: DateDiffImpl impl{name}; @@ -490,7 +453,7 @@ private: REGISTER_FUNCTION(DateDiff) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); factory.registerAlias("date_diff", FunctionDateDiff::name); factory.registerAlias("DATE_DIFF", FunctionDateDiff::name); factory.registerAlias("timestampDiff", FunctionDateDiff::name); @@ -509,12 +472,12 @@ Example: )", .examples{ {"typical", "SELECT timeDiff(UTCTimestamp(), now());", ""}}, - .categories{"Dates and Times"}}, FunctionFactory::CaseInsensitive); + .categories{"Dates and Times"}}, FunctionFactory::Case::Insensitive); } REGISTER_FUNCTION(Age) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index c06dfe15dc4..846cb87f1ee 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -109,14 +109,14 @@ public: ColumnPtr executeImpl( const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, - [[maybe_unused]] size_t input_rows_count) const override + size_t input_rows_count) const override { ColumnPtr res; - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception( ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Date or DateTime.", @@ -127,7 +127,7 @@ public: } template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { auto * times = checkAndGetColumn(arguments[1].column.get()); if (!times) @@ -144,7 +144,7 @@ public: String date_part = date_part_column->getValue(); const DateLUTImpl * time_zone_tmp; - if (std::is_same_v || std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 1); else time_zone_tmp = &DateLUT::instance(); @@ -175,7 +175,7 @@ public: using TimeType = DateTypeToTimeType; callOnDatePartWriter(date_part, [&](const auto & writer) { - for (size_t i = 0; i < times_data.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::is_same_v) { @@ -354,7 +354,7 @@ private: REGISTER_FUNCTION(DateName) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/date_trunc.cpp b/src/Functions/date_trunc.cpp index b8c60dd164e..dd3ea0b877b 100644 --- a/src/Functions/date_trunc.cpp +++ b/src/Functions/date_trunc.cpp @@ -178,7 +178,7 @@ REGISTER_FUNCTION(DateTrunc) factory.registerFunction(); /// Compatibility alias. - factory.registerAlias("DATE_TRUNC", "dateTrunc", FunctionFactory::CaseInsensitive); + factory.registerAlias("DATE_TRUNC", "dateTrunc", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/degrees.cpp b/src/Functions/degrees.cpp index 3aa20a77a0d..94b5ce3682c 100644 --- a/src/Functions/degrees.cpp +++ b/src/Functions/degrees.cpp @@ -7,23 +7,25 @@ namespace DB { namespace { - struct DegreesName - { - static constexpr auto name = "degrees"; - }; - Float64 degrees(Float64 r) - { - Float64 degrees = r * (180 / M_PI); - return degrees; - } +struct DegreesName +{ + static constexpr auto name = "degrees"; +}; + +Float64 degrees(Float64 r) +{ + Float64 degrees = r * (180 / M_PI); + return degrees; +} + +using FunctionDegrees = FunctionMathUnary>; - using FunctionDegrees = FunctionMathUnary>; } REGISTER_FUNCTION(Degrees) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/exp.cpp b/src/Functions/exp.cpp index d352cda7460..e67cbd6d819 100644 --- a/src/Functions/exp.cpp +++ b/src/Functions/exp.cpp @@ -36,7 +36,7 @@ using FunctionExp = FunctionMathUnary>; REGISTER_FUNCTION(Exp) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/extractAllGroupsVertical.cpp b/src/Functions/extractAllGroupsVertical.cpp index 87a0b4cf7bc..6a968d89354 100644 --- a/src/Functions/extractAllGroupsVertical.cpp +++ b/src/Functions/extractAllGroupsVertical.cpp @@ -18,7 +18,7 @@ namespace DB REGISTER_FUNCTION(ExtractAllGroupsVertical) { factory.registerFunction>(); - factory.registerAlias("extractAllGroups", VerticalImpl::Name, FunctionFactory::CaseSensitive); + factory.registerAlias("extractAllGroups", VerticalImpl::Name); } } diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp index 7ff9126c004..9b319caad63 100644 --- a/src/Functions/factorial.cpp +++ b/src/Functions/factorial.cpp @@ -106,7 +106,7 @@ The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any ne )", .examples{{"factorial", "SELECT factorial(10)", ""}}, .categories{"Mathematical"}}, - FunctionFactory::CaseInsensitive); + FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp index 9fbf9b0cbe7..9b168f3f088 100644 --- a/src/Functions/filesystem.cpp +++ b/src/Functions/filesystem.cpp @@ -91,7 +91,7 @@ public: auto col_res = ColumnVector::create(col_str->size()); auto & data = col_res->getData(); - for (size_t i = 0; i < col_str->size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { auto disk_name = col_str->getDataAt(i).toString(); if (auto it = disk_map.find(disk_name); it != disk_map.end()) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 1c5dc05c5aa..f33b7849a43 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -782,9 +783,9 @@ public: static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionFormatDateTimeImpl(ContextPtr context) - : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name) - , mysql_f_prints_single_zero(context->getSettings().formatdatetime_f_prints_single_zero) - , mysql_format_ckl_without_leading_zeros(context->getSettings().formatdatetime_format_without_leading_zeros) + : mysql_M_is_month_name(context->getSettingsRef().formatdatetime_parsedatetime_m_is_month_name) + , mysql_f_prints_single_zero(context->getSettingsRef().formatdatetime_f_prints_single_zero) + , mysql_format_ckl_without_leading_zeros(context->getSettingsRef().formatdatetime_format_without_leading_zeros) { } @@ -847,7 +848,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, [[maybe_unused]] size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { ColumnPtr res; if constexpr (support_integer == SupportInteger::Yes) @@ -861,17 +862,17 @@ public: if (!castType(arguments[0].type.get(), [&](const auto & type) { using FromDataType = std::decay_t; - if (!(res = executeType(arguments, result_type))) + if (!(res = executeType(arguments, result_type, input_rows_count))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Integer, Date, Date32, DateTime or DateTime64.", arguments[0].column->getName(), getName()); return true; })) { - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Integer or DateTime.", arguments[0].column->getName(), getName()); @@ -880,10 +881,10 @@ public: } else { - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Date or DateTime.", arguments[0].column->getName(), getName()); @@ -893,7 +894,7 @@ public: } template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { auto non_const_datetime = arguments[0].column->convertToFullColumnIfConst(); auto * times = checkAndGetColumn(non_const_datetime.get()); @@ -954,13 +955,11 @@ public: else time_zone = &DateLUT::instance(); - const auto & vec = times->getData(); - auto col_res = ColumnString::create(); auto & res_data = col_res->getChars(); auto & res_offsets = col_res->getOffsets(); - res_data.resize(vec.size() * (out_template_size + 1)); - res_offsets.resize(vec.size()); + res_data.resize(input_rows_count * (out_template_size + 1)); + res_offsets.resize(input_rows_count); if constexpr (format_syntax == FormatSyntax::MySQL) { @@ -989,9 +988,11 @@ public: } } + const auto & vec = times->getData(); + auto * begin = reinterpret_cast(res_data.data()); auto * pos = begin; - for (size_t i = 0; i < vec.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if (!const_time_zone_column && arguments.size() > 2) { @@ -1833,10 +1834,10 @@ using FunctionFromUnixTimestampInJodaSyntax = FunctionFormatDateTimeImpl(); - factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name, FunctionFactory::Case::Insensitive); factory.registerFunction(); - factory.registerAlias("FROM_UNIXTIME", FunctionFromUnixTimestamp::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("FROM_UNIXTIME", FunctionFromUnixTimestamp::name, FunctionFactory::Case::Insensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 655ea2e7cde..4e3f302ce36 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -74,7 +75,7 @@ public: if (const ColumnString * col_query_string = checkAndGetColumn(col_query.get())) { auto col_res = ColumnString::create(); - formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map); + formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map, input_rows_count); if (error_handling == ErrorHandling::Null) return ColumnNullable::create(std::move(col_res), std::move(col_null_map)); @@ -91,16 +92,16 @@ private: const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, - ColumnUInt8::MutablePtr & res_null_map) const + ColumnUInt8::MutablePtr & res_null_map, + size_t input_rows_count) const { - const size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); res_data.resize(data.size()); size_t prev_offset = 0; size_t res_data_size = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * begin = reinterpret_cast(&data[prev_offset]); const char * end = begin + offsets[i] - prev_offset - 1; diff --git a/src/Functions/formatReadable.h b/src/Functions/formatReadable.h index 487ec9d79d0..9161ab43e28 100644 --- a/src/Functions/formatReadable.h +++ b/src/Functions/formatReadable.h @@ -55,19 +55,19 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr res; - if (!((res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)))) + if (!((res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); @@ -76,7 +76,7 @@ public: private: template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { if (const ColumnVector * col_from = checkAndGetColumn>(arguments[0].column.get())) { @@ -85,13 +85,12 @@ private: const typename ColumnVector::Container & vec_from = col_from->getData(); ColumnString::Chars & data_to = col_to->getChars(); ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - data_to.resize(size * 2); - offsets_to.resize(size); + data_to.resize(input_rows_count * 2); + offsets_to.resize(input_rows_count); WriteBufferFromVector buf_to(data_to); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Impl::format(static_cast(vec_from[i]), buf_to); writeChar(0, buf_to); diff --git a/src/Functions/formatReadableDecimalSize.cpp b/src/Functions/formatReadableDecimalSize.cpp index 1aa5abc526e..9298360aebc 100644 --- a/src/Functions/formatReadableDecimalSize.cpp +++ b/src/Functions/formatReadableDecimalSize.cpp @@ -29,8 +29,7 @@ Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB .examples{ {"formatReadableDecimalSize", "SELECT formatReadableDecimalSize(1000)", ""}}, .categories{"OtherFunctions"} - }, - FunctionFactory::CaseSensitive); + }); } } diff --git a/src/Functions/formatReadableSize.cpp b/src/Functions/formatReadableSize.cpp index 5c11603e9d7..ee66a0396df 100644 --- a/src/Functions/formatReadableSize.cpp +++ b/src/Functions/formatReadableSize.cpp @@ -22,7 +22,7 @@ namespace REGISTER_FUNCTION(FormatReadableSize) { factory.registerFunction>(); - factory.registerAlias("FORMAT_BYTES", Impl::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("FORMAT_BYTES", Impl::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/fromDaysSinceYearZero.cpp b/src/Functions/fromDaysSinceYearZero.cpp index 0543e6bf229..e1ba9ea533e 100644 --- a/src/Functions/fromDaysSinceYearZero.cpp +++ b/src/Functions/fromDaysSinceYearZero.cpp @@ -125,7 +125,7 @@ The calculation is the same as in MySQL's FROM_DAYS() function. .examples{{"typical", "SELECT fromDaysSinceYearZero32(713569)", "2023-09-08"}}, .categories{"Dates and Times"}}); - factory.registerAlias("FROM_DAYS", FunctionFromDaysSinceYearZero::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("FROM_DAYS", FunctionFromDaysSinceYearZero::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index a171b6bf86e..c95e3edf4ca 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -4,10 +4,10 @@ #include #include #include +#include #include #include "base/types.h" - namespace DB { @@ -96,10 +96,11 @@ struct SnowflakeIdRange /// 1. calculate Snowflake ID by current timestamp (`now`) /// 2. `begin = max(available, now)` /// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow -SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count) +SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, uint64_t machine_id, size_t input_rows_count) + { /// 1. `now` - SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = getMachineId(), .machine_seq_num = 0}; + SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = machine_id, .machine_seq_num = 0}; /// 2. `begin` if (begin.timestamp <= available.timestamp) @@ -128,13 +129,13 @@ struct Data /// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously. static inline std::atomic lowest_available_snowflake_id = 0; - SnowflakeId reserveRange(size_t input_rows_count) + SnowflakeId reserveRange(uint64_t machine_id, size_t input_rows_count) { uint64_t available_snowflake_id = lowest_available_snowflake_id.load(); SnowflakeIdRange range; do { - range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count); + range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), machine_id, input_rows_count); } while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end))); /// CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try @@ -165,24 +166,32 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary expression"} + {"expr", nullptr, nullptr, "Arbitrary expression"}, + {"machine_id", static_cast(&isNativeUInt), static_cast(&isColumnConst), "const UInt*"} }; validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); - if (input_rows_count != 0) + if (input_rows_count > 0) { vec_to.resize(input_rows_count); + uint64_t machine_id = getMachineId(); + if (arguments.size() == 2) + { + machine_id = arguments[1].column->getUInt(0); + machine_id &= (1ull << machine_id_bits_count) - 1; + } + Data data; - SnowflakeId snowflake_id = data.reserveRange(input_rows_count); /// returns begin of available snowflake ids range + SnowflakeId snowflake_id = data.reserveRange(machine_id, input_rows_count); for (UInt64 & to_row : vec_to) { @@ -208,10 +217,13 @@ public: REGISTER_FUNCTION(GenerateSnowflakeID) { FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; - FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression])"; - FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; + FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression, [machine_id]])"; + FunctionDocumentation::Arguments arguments = { + {"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}, + {"machine_id", "A machine ID, the lowest 10 bits are used. Optional."} + }; FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64"; - FunctionDocumentation::Examples examples = {{"single", "SELECT generateSnowflakeID()", "7201148511606784000"}, {"multiple", "SELECT generateSnowflakeID(1), generateSnowflakeID(2)", ""}}; + FunctionDocumentation::Examples examples = {{"no_arguments", "SELECT generateSnowflakeID()", "7201148511606784000"}, {"with_machine_id", "SELECT generateSnowflakeID(1)", "7201148511606784001"}, {"with_expression_and_machine_id", "SELECT generateSnowflakeID('some_expression', 1)", "7201148511606784002"}}; FunctionDocumentation::Categories categories = {"Snowflake ID"}; factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); diff --git a/src/Functions/generateULID.cpp b/src/Functions/generateULID.cpp index f2f2d8ae3b9..933618ccec3 100644 --- a/src/Functions/generateULID.cpp +++ b/src/Functions/generateULID.cpp @@ -85,8 +85,7 @@ The function returns a value of type FixedString(26). {"ulid", "SELECT generateULID()", ""}, {"multiple", "SELECT generateULID(1), generateULID(2)", ""}}, .categories{"ULID"} - }, - FunctionFactory::CaseSensitive); + }); } } diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index 96ad7dacfc4..cace6c09fec 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -51,21 +51,19 @@ public: } template - bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) const + bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column, size_t input_rows_count) const { const auto * encoded = checkAndGetColumn(encoded_column); if (!encoded) return false; - const size_t count = encoded->size(); - - auto latitude = ColumnFloat64::create(count); - auto longitude = ColumnFloat64::create(count); + auto latitude = ColumnFloat64::create(input_rows_count); + auto longitude = ColumnFloat64::create(input_rows_count); ColumnFloat64::Container & lon_data = longitude->getData(); ColumnFloat64::Container & lat_data = latitude->getData(); - for (size_t i = 0; i < count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { std::string_view encoded_string = encoded->getDataAt(i).toView(); geohashDecode(encoded_string.data(), encoded_string.size(), &lon_data[i], &lat_data[i]); @@ -79,13 +77,13 @@ public: return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * encoded = arguments[0].column.get(); ColumnPtr res_column; - if (tryExecute(encoded, res_column) || - tryExecute(encoded, res_column)) + if (tryExecute(encoded, res_column, input_rows_count) || + tryExecute(encoded, res_column, input_rows_count)) return res_column; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument type:{} of argument of function {}", diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 034c8188b63..c49acddd81f 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -53,7 +53,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * longitude = arguments[0].column.get(); const IColumn * latitude = arguments[1].column.get(); @@ -65,26 +65,24 @@ public: precision = arguments[2].column; ColumnPtr res_column; - vector(longitude, latitude, precision.get(), res_column); + vector(longitude, latitude, precision.get(), res_column, input_rows_count); return res_column; } private: - void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result, size_t input_rows_count) const { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); - const size_t size = lat_column->size(); - - out_offsets.resize(size); - out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); + out_offsets.resize(input_rows_count); + out_vec.resize(input_rows_count * (GEOHASH_MAX_TEXT_LENGTH + 1)); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const Float64 longitude_value = lon_column->getFloat64(i); const Float64 latitude_value = lat_column->getFloat64(i); diff --git a/src/Functions/getClientHTTPHeader.cpp b/src/Functions/getClientHTTPHeader.cpp index ebd070a90b9..140f39d03b8 100644 --- a/src/Functions/getClientHTTPHeader.cpp +++ b/src/Functions/getClientHTTPHeader.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/getMacro.cpp b/src/Functions/getMacro.cpp index 8172fc8ba2e..b7f0b34d652 100644 --- a/src/Functions/getMacro.cpp +++ b/src/Functions/getMacro.cpp @@ -53,6 +53,8 @@ public: /// getMacro may return different values on different shards/replicas, so it's not constant for distributed query bool isSuitableForConstantFolding() const override { return !is_distributed; } + bool isServerConstant() const override { return true; } + size_t getNumberOfArguments() const override { return 1; diff --git a/src/Functions/getScalar.cpp b/src/Functions/getScalar.cpp index 7196cbc0a36..5131dca962e 100644 --- a/src/Functions/getScalar.cpp +++ b/src/Functions/getScalar.cpp @@ -49,6 +49,8 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isServerConstant() const override { return true; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1 || !isString(arguments[0].type) || !arguments[0].column || !isColumnConst(*arguments[0].column)) @@ -105,6 +107,8 @@ public: bool isDeterministic() const override { return false; } + bool isServerConstant() const override { return true; } + bool isSuitableForConstantFolding() const override { return !is_distributed; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/getSetting.cpp b/src/Functions/getSetting.cpp index d6b8946e760..aed6b2119e4 100644 --- a/src/Functions/getSetting.cpp +++ b/src/Functions/getSetting.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp index 87a48c887b4..88539bda4a5 100644 --- a/src/Functions/greatest.cpp +++ b/src/Functions/greatest.cpp @@ -65,7 +65,7 @@ using FunctionGreatest = FunctionBinaryArithmetic; REGISTER_FUNCTION(Greatest) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/hasColumnInTable.cpp b/src/Functions/hasColumnInTable.cpp index 8ea16f688ee..00714997b4a 100644 --- a/src/Functions/hasColumnInTable.cpp +++ b/src/Functions/hasColumnInTable.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Functions/hasSubsequence.cpp b/src/Functions/hasSubsequence.cpp index 4bcce53b4db..1426e8cb7a9 100644 --- a/src/Functions/hasSubsequence.cpp +++ b/src/Functions/hasSubsequence.cpp @@ -24,7 +24,7 @@ using FunctionHasSubsequence = HasSubsequenceImpl({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/hasSubsequenceCaseInsensitive.cpp b/src/Functions/hasSubsequenceCaseInsensitive.cpp index c93bbead58c..8e5751066a9 100644 --- a/src/Functions/hasSubsequenceCaseInsensitive.cpp +++ b/src/Functions/hasSubsequenceCaseInsensitive.cpp @@ -23,7 +23,7 @@ using FunctionHasSubsequenceCaseInsensitive = HasSubsequenceImpl({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp index 18438bc8b16..039af061bf5 100644 --- a/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp +++ b/src/Functions/hasSubsequenceCaseInsensitiveUTF8.cpp @@ -25,7 +25,7 @@ using FunctionHasSubsequenceCaseInsensitiveUTF8 = HasSubsequenceImpl({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/hasSubsequenceUTF8.cpp b/src/Functions/hasSubsequenceUTF8.cpp index 7a22211eb8c..636fbfab85f 100644 --- a/src/Functions/hasSubsequenceUTF8.cpp +++ b/src/Functions/hasSubsequenceUTF8.cpp @@ -24,7 +24,7 @@ using FunctionHasSubsequenceUTF8 = HasSubsequenceImpl({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/hasToken.cpp b/src/Functions/hasToken.cpp index fa41abf2641..299a8a16b35 100644 --- a/src/Functions/hasToken.cpp +++ b/src/Functions/hasToken.cpp @@ -25,10 +25,10 @@ using FunctionHasTokenOrNull REGISTER_FUNCTION(HasToken) { factory.registerFunction(FunctionDocumentation - {.description="Performs lookup of needle in haystack using tokenbf_v1 index."}, FunctionFactory::CaseSensitive); + {.description="Performs lookup of needle in haystack using tokenbf_v1 index."}); factory.registerFunction(FunctionDocumentation - {.description="Performs lookup of needle in haystack using tokenbf_v1 index. Returns null if needle is ill-formed."}, FunctionFactory::CaseSensitive); + {.description="Performs lookup of needle in haystack using tokenbf_v1 index. Returns null if needle is ill-formed."}); } } diff --git a/src/Functions/hasTokenCaseInsensitive.cpp b/src/Functions/hasTokenCaseInsensitive.cpp index 32675b9384d..6ff134194e3 100644 --- a/src/Functions/hasTokenCaseInsensitive.cpp +++ b/src/Functions/hasTokenCaseInsensitive.cpp @@ -26,11 +26,11 @@ REGISTER_FUNCTION(HasTokenCaseInsensitive) { factory.registerFunction( FunctionDocumentation{.description="Performs case insensitive lookup of needle in haystack using tokenbf_v1 index."}, - DB::FunctionFactory::CaseInsensitive); + DB::FunctionFactory::Case::Insensitive); factory.registerFunction( FunctionDocumentation{.description="Performs case insensitive lookup of needle in haystack using tokenbf_v1 index. Returns null if needle is ill-formed."}, - DB::FunctionFactory::CaseInsensitive); + DB::FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/hypot.cpp b/src/Functions/hypot.cpp index 465471cb09b..8845d1fa8ae 100644 --- a/src/Functions/hypot.cpp +++ b/src/Functions/hypot.cpp @@ -15,7 +15,7 @@ namespace REGISTER_FUNCTION(Hypot) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 4c08cd3b931..07dbee27a9d 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -1308,7 +1309,7 @@ public: REGISTER_FUNCTION(If) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } FunctionOverloadResolverPtr createInternalFunctionIfOverloadResolver(bool allow_experimental_variant_type, bool use_variant_as_common_type) diff --git a/src/Functions/ifNull.cpp b/src/Functions/ifNull.cpp index 1093f3f817f..358a52c8394 100644 --- a/src/Functions/ifNull.cpp +++ b/src/Functions/ifNull.cpp @@ -91,7 +91,7 @@ private: REGISTER_FUNCTION(IfNull) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/indexHint.h b/src/Functions/indexHint.h index 3b71c7a5585..a71e1555c78 100644 --- a/src/Functions/indexHint.h +++ b/src/Functions/indexHint.h @@ -42,6 +42,10 @@ public: bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + + bool useDefaultImplementationForSparseColumns() const override { return false; } + bool isSuitableForConstantFolding() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp index 6b2958227bc..4661ce117c0 100644 --- a/src/Functions/initcap.cpp +++ b/src/Functions/initcap.cpp @@ -60,7 +60,7 @@ using FunctionInitcap = FunctionStringToString; REGISTER_FUNCTION(Initcap) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/initialQueryID.cpp b/src/Functions/initialQueryID.cpp index 9c9390d4e50..f32f92a2f46 100644 --- a/src/Functions/initialQueryID.cpp +++ b/src/Functions/initialQueryID.cpp @@ -41,6 +41,6 @@ public: REGISTER_FUNCTION(InitialQueryID) { factory.registerFunction(); - factory.registerAlias("initial_query_id", FunctionInitialQueryID::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("initial_query_id", FunctionInitialQueryID::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/isNotNull.cpp b/src/Functions/isNotNull.cpp index a10e7ebd40c..a48ace4243f 100644 --- a/src/Functions/isNotNull.cpp +++ b/src/Functions/isNotNull.cpp @@ -22,7 +22,9 @@ class FunctionIsNotNull : public IFunction public: static constexpr auto name = "isNotNull"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context) { return std::make_shared(context->getSettingsRef().allow_experimental_analyzer); } + + explicit FunctionIsNotNull(bool use_analyzer_) : use_analyzer(use_analyzer_) {} std::string getName() const override { @@ -31,6 +33,10 @@ public: ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override { + /// (column IS NULL) triggers a bug in old analyzer when it is replaced to constant. + if (!use_analyzer) + return nullptr; + const ColumnWithTypeAndName & elem = arguments[0]; if (elem.type->onlyNull()) return result_type->createColumnConst(1, UInt8(0)); @@ -123,6 +129,8 @@ private: #endif vectorImpl(null_map, res); } + + bool use_analyzer; }; } diff --git a/src/Functions/isNull.cpp b/src/Functions/isNull.cpp index 95d659b103b..6d799a4ee4e 100644 --- a/src/Functions/isNull.cpp +++ b/src/Functions/isNull.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include namespace DB @@ -21,11 +23,13 @@ class FunctionIsNull : public IFunction public: static constexpr auto name = "isNull"; - static FunctionPtr create(ContextPtr) + static FunctionPtr create(ContextPtr context) { - return std::make_shared(); + return std::make_shared(context->getSettingsRef().allow_experimental_analyzer); } + explicit FunctionIsNull(bool use_analyzer_) : use_analyzer(use_analyzer_) {} + std::string getName() const override { return name; @@ -33,6 +37,10 @@ public: ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override { + /// (column IS NULL) triggers a bug in old analyzer when it is replaced to constant. + if (!use_analyzer) + return nullptr; + const ColumnWithTypeAndName & elem = arguments[0]; if (elem.type->onlyNull()) return result_type->createColumnConst(1, UInt8(1)); @@ -95,13 +103,16 @@ public: return DataTypeUInt8().createColumnConst(elem.column->size(), 0u); } } + +private: + bool use_analyzer; }; } REGISTER_FUNCTION(IsNull) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/isNullable.cpp b/src/Functions/isNullable.cpp index b24ee4f5e73..d21ac9cf07c 100644 --- a/src/Functions/isNullable.cpp +++ b/src/Functions/isNullable.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include namespace DB { @@ -14,11 +16,13 @@ class FunctionIsNullable : public IFunction { public: static constexpr auto name = "isNullable"; - static FunctionPtr create(ContextPtr) + static FunctionPtr create(ContextPtr context) { - return std::make_shared(); + return std::make_shared(context->getSettingsRef().allow_experimental_analyzer); } + explicit FunctionIsNullable(bool use_analyzer_) : use_analyzer(use_analyzer_) {} + String getName() const override { return name; @@ -26,6 +30,10 @@ public: ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override { + /// isNullable(column) triggers a bug in old analyzer when it is replaced to constant. + if (!use_analyzer) + return nullptr; + const ColumnWithTypeAndName & elem = arguments[0]; if (elem.type->onlyNull() || canContainNull(*elem.type)) return result_type->createColumnConst(1, UInt8(1)); @@ -60,6 +68,9 @@ public: const auto & elem = arguments[0]; return ColumnUInt8::create(input_rows_count, isColumnNullable(*elem.column) || elem.type->isLowCardinalityNullable()); } + +private: + bool use_analyzer; }; } diff --git a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp index 94f02861af0..1c5164e132d 100644 --- a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp +++ b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -240,7 +241,7 @@ REGISTER_FUNCTION(ExtractKeyValuePairs) └──────────────────┘ ```)"} ); - factory.registerAlias("str_to_map", NameExtractKeyValuePairs::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("str_to_map", NameExtractKeyValuePairs::name, FunctionFactory::Case::Insensitive); factory.registerAlias("mapFromString", NameExtractKeyValuePairs::name); } diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp index babb8378d80..091a868e8e2 100644 --- a/src/Functions/least.cpp +++ b/src/Functions/least.cpp @@ -65,7 +65,7 @@ using FunctionLeast = FunctionBinaryArithmetic; REGISTER_FUNCTION(Least) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/left.cpp b/src/Functions/left.cpp index 006706c8f21..c9f62a0f8f1 100644 --- a/src/Functions/left.cpp +++ b/src/Functions/left.cpp @@ -6,8 +6,8 @@ namespace DB REGISTER_FUNCTION(Left) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerFunction>({}, FunctionFactory::CaseSensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); + factory.registerFunction>({}, FunctionFactory::Case::Sensitive); } } diff --git a/src/Functions/lemmatize.cpp b/src/Functions/lemmatize.cpp index e1168e077bb..76e5ed7432e 100644 --- a/src/Functions/lemmatize.cpp +++ b/src/Functions/lemmatize.cpp @@ -3,6 +3,7 @@ #if USE_NLP #include +#include #include #include #include diff --git a/src/Functions/lengthUTF8.cpp b/src/Functions/lengthUTF8.cpp index 5a4af4934df..59a0d532602 100644 --- a/src/Functions/lengthUTF8.cpp +++ b/src/Functions/lengthUTF8.cpp @@ -83,8 +83,8 @@ REGISTER_FUNCTION(LengthUTF8) factory.registerFunction(); /// Compatibility aliases. - factory.registerAlias("CHAR_LENGTH", "lengthUTF8", FunctionFactory::CaseInsensitive); - factory.registerAlias("CHARACTER_LENGTH", "lengthUTF8", FunctionFactory::CaseInsensitive); + factory.registerAlias("CHAR_LENGTH", "lengthUTF8", FunctionFactory::Case::Insensitive); + factory.registerAlias("CHARACTER_LENGTH", "lengthUTF8", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/locate.cpp b/src/Functions/locate.cpp index d9a727ab3ef..076aa1bdc6d 100644 --- a/src/Functions/locate.cpp +++ b/src/Functions/locate.cpp @@ -29,6 +29,6 @@ REGISTER_FUNCTION(Locate) FunctionDocumentation::Categories doc_categories = {"String search"}; - factory.registerFunction({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive); + factory.registerFunction({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/log.cpp b/src/Functions/log.cpp index 9096b8c6f22..8bebdb8d7bd 100644 --- a/src/Functions/log.cpp +++ b/src/Functions/log.cpp @@ -34,8 +34,8 @@ using FunctionLog = FunctionMathUnary>; REGISTER_FUNCTION(Log) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerAlias("ln", "log", FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerAlias("ln", "log", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/log10.cpp b/src/Functions/log10.cpp index 5dfe4ac9357..6241df3e092 100644 --- a/src/Functions/log10.cpp +++ b/src/Functions/log10.cpp @@ -13,7 +13,7 @@ using FunctionLog10 = FunctionMathUnary({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/log2.cpp b/src/Functions/log2.cpp index 9457ac64bc6..52b3ab52ea7 100644 --- a/src/Functions/log2.cpp +++ b/src/Functions/log2.cpp @@ -13,7 +13,7 @@ using FunctionLog2 = FunctionMathUnary>; REGISTER_FUNCTION(Log2) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/lower.cpp b/src/Functions/lower.cpp index 38ae5a8a7f0..5210a20b026 100644 --- a/src/Functions/lower.cpp +++ b/src/Functions/lower.cpp @@ -19,8 +19,8 @@ using FunctionLower = FunctionStringToString, NameLower REGISTER_FUNCTION(Lower) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerAlias("lcase", NameLower::name, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerAlias("lcase", NameLower::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index 41a09793994..21d466d7708 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -724,7 +724,7 @@ public: REGISTER_FUNCTION(MakeDate) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); factory.registerFunction>(); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 66cd10a3f0b..0d42ce2abe1 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/match.cpp b/src/Functions/match.cpp index c719cc6dd82..6cd65597032 100644 --- a/src/Functions/match.cpp +++ b/src/Functions/match.cpp @@ -20,7 +20,7 @@ using FunctionMatch = FunctionsStringSearch(); - factory.registerAlias("REGEXP_MATCHES", NameMatch::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("REGEXP_MATCHES", NameMatch::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/mathConstants.cpp b/src/Functions/mathConstants.cpp index 2b199a30616..37ababbc0e5 100644 --- a/src/Functions/mathConstants.cpp +++ b/src/Functions/mathConstants.cpp @@ -44,7 +44,7 @@ REGISTER_FUNCTION(E) REGISTER_FUNCTION(Pi) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/max2.cpp b/src/Functions/max2.cpp index 928e6f22918..88b5c7c08c0 100644 --- a/src/Functions/max2.cpp +++ b/src/Functions/max2.cpp @@ -21,6 +21,6 @@ namespace REGISTER_FUNCTION(Max2) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/min2.cpp b/src/Functions/min2.cpp index f031530edf5..8ab56dbe90d 100644 --- a/src/Functions/min2.cpp +++ b/src/Functions/min2.cpp @@ -22,6 +22,6 @@ namespace REGISTER_FUNCTION(Min2) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index ebc1c4f5275..76a07aeda2e 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -155,7 +155,7 @@ using FunctionModulo = BinaryArithmeticOverloadResolver(); - factory.registerAlias("mod", "modulo", FunctionFactory::CaseInsensitive); + factory.registerAlias("mod", "modulo", FunctionFactory::Case::Insensitive); } struct NameModuloLegacy { static constexpr auto name = "moduloLegacy"; }; @@ -183,11 +183,11 @@ In other words, the function returning the modulus (modulo) in the terms of Modu )", .examples{{"positiveModulo", "SELECT positiveModulo(-1, 10);", ""}}, .categories{"Arithmetic"}}, - FunctionFactory::CaseInsensitive); + FunctionFactory::Case::Insensitive); - factory.registerAlias("positive_modulo", "positiveModulo", FunctionFactory::CaseInsensitive); + factory.registerAlias("positive_modulo", "positiveModulo", FunctionFactory::Case::Insensitive); /// Compatibility with Spark: - factory.registerAlias("pmod", "positiveModulo", FunctionFactory::CaseInsensitive); + factory.registerAlias("pmod", "positiveModulo", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/monthName.cpp b/src/Functions/monthName.cpp index f49f77bd6e7..ae444460170 100644 --- a/src/Functions/monthName.cpp +++ b/src/Functions/monthName.cpp @@ -74,7 +74,7 @@ private: REGISTER_FUNCTION(MonthName) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index d3bf5618f66..c4b675fcf6c 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/neighbor.cpp b/src/Functions/neighbor.cpp index 62f129109f9..49b8980e7e2 100644 --- a/src/Functions/neighbor.cpp +++ b/src/Functions/neighbor.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Functions/now.cpp b/src/Functions/now.cpp index 827b800a243..7b2150e3534 100644 --- a/src/Functions/now.cpp +++ b/src/Functions/now.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -90,7 +91,7 @@ public: size_t getNumberOfArguments() const override { return 0; } static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique(context); } explicit NowOverloadResolver(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override @@ -137,8 +138,8 @@ private: REGISTER_FUNCTION(Now) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerAlias("current_timestamp", NowOverloadResolver::name, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerAlias("current_timestamp", NowOverloadResolver::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index d6f8474c984..9786a0c9f39 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -118,7 +119,7 @@ public: size_t getNumberOfArguments() const override { return 0; } static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique(context); } explicit Now64OverloadResolver(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override @@ -169,7 +170,7 @@ private: REGISTER_FUNCTION(Now64) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/nowInBlock.cpp b/src/Functions/nowInBlock.cpp index 74f420986c8..291e43bdce5 100644 --- a/src/Functions/nowInBlock.cpp +++ b/src/Functions/nowInBlock.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -31,7 +32,7 @@ public: return std::make_shared(context); } explicit FunctionNowInBlock(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} String getName() const override diff --git a/src/Functions/nullIf.cpp b/src/Functions/nullIf.cpp index 392cc20cfcf..550287885a1 100644 --- a/src/Functions/nullIf.cpp +++ b/src/Functions/nullIf.cpp @@ -69,7 +69,7 @@ public: REGISTER_FUNCTION(NullIf) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/padString.cpp b/src/Functions/padString.cpp index 8670c837e21..23554c3fbbc 100644 --- a/src/Functions/padString.cpp +++ b/src/Functions/padString.cpp @@ -335,8 +335,8 @@ REGISTER_FUNCTION(PadString) factory.registerFunction>(); /// rightPad factory.registerFunction>(); /// rightPadUTF8 - factory.registerAlias("lpad", "leftPad", FunctionFactory::CaseInsensitive); - factory.registerAlias("rpad", "rightPad", FunctionFactory::CaseInsensitive); + factory.registerAlias("lpad", "leftPad", FunctionFactory::Case::Insensitive); + factory.registerAlias("rpad", "rightPad", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 339eb4cb26c..bdca0151bba 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -564,8 +565,8 @@ namespace static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionParseDateTimeImpl(ContextPtr context) - : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name) - , mysql_parse_ckl_without_leading_zeros(context->getSettings().parsedatetime_parse_without_leading_zeros) + : mysql_M_is_month_name(context->getSettingsRef().formatdatetime_parsedatetime_m_is_month_name) + , mysql_parse_ckl_without_leading_zeros(context->getSettingsRef().parsedatetime_parse_without_leading_zeros) { } @@ -2097,10 +2098,10 @@ namespace REGISTER_FUNCTION(ParseDateTime) { factory.registerFunction(); - factory.registerAlias("TO_UNIXTIME", FunctionParseDateTime::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("TO_UNIXTIME", FunctionParseDateTime::name, FunctionFactory::Case::Insensitive); factory.registerFunction(); factory.registerFunction(); - factory.registerAlias("str_to_date", FunctionParseDateTimeOrNull::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("str_to_date", FunctionParseDateTimeOrNull::name, FunctionFactory::Case::Insensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp index e2e9038cd8b..dab56f39adc 100644 --- a/src/Functions/partitionId.cpp +++ b/src/Functions/partitionId.cpp @@ -66,6 +66,7 @@ public: REGISTER_FUNCTION(PartitionId) { factory.registerFunction(); + factory.registerAlias("partitionID", "partitionId"); } } diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index 6b413829bd1..32d5905a513 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/position.cpp b/src/Functions/position.cpp index 29a5db2eb24..aad47cc5b3f 100644 --- a/src/Functions/position.cpp +++ b/src/Functions/position.cpp @@ -19,6 +19,6 @@ using FunctionPosition = FunctionsStringSearch({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/positionCaseInsensitive.cpp b/src/Functions/positionCaseInsensitive.cpp index f71ce0078cc..7c59ffa83cd 100644 --- a/src/Functions/positionCaseInsensitive.cpp +++ b/src/Functions/positionCaseInsensitive.cpp @@ -20,6 +20,6 @@ using FunctionPositionCaseInsensitive = FunctionsStringSearch(); - factory.registerAlias("instr", NamePositionCaseInsensitive::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("instr", NamePositionCaseInsensitive::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/pow.cpp b/src/Functions/pow.cpp index 9b383da97e7..f2976b4812e 100644 --- a/src/Functions/pow.cpp +++ b/src/Functions/pow.cpp @@ -13,8 +13,8 @@ using FunctionPow = FunctionMathBinaryFloat64({}, FunctionFactory::CaseInsensitive); - factory.registerAlias("power", "pow", FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerAlias("power", "pow", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/queryID.cpp b/src/Functions/queryID.cpp index 5d0ac719797..7299714e464 100644 --- a/src/Functions/queryID.cpp +++ b/src/Functions/queryID.cpp @@ -41,6 +41,6 @@ public: REGISTER_FUNCTION(QueryID) { factory.registerFunction(); - factory.registerAlias("query_id", FunctionQueryID::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("query_id", FunctionQueryID::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/radians.cpp b/src/Functions/radians.cpp index 2c2c2743532..9185340be15 100644 --- a/src/Functions/radians.cpp +++ b/src/Functions/radians.cpp @@ -23,7 +23,7 @@ namespace REGISTER_FUNCTION(Radians) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/rand.cpp b/src/Functions/rand.cpp index ea30922d731..35b325e59fd 100644 --- a/src/Functions/rand.cpp +++ b/src/Functions/rand.cpp @@ -13,7 +13,7 @@ using FunctionRand = FunctionRandom; REGISTER_FUNCTION(Rand) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); factory.registerAlias("rand32", NameRand::name); } diff --git a/src/Functions/regexpExtract.cpp b/src/Functions/regexpExtract.cpp index 3cc5393296c..6bedac54e39 100644 --- a/src/Functions/regexpExtract.cpp +++ b/src/Functions/regexpExtract.cpp @@ -253,7 +253,7 @@ REGISTER_FUNCTION(RegexpExtract) FunctionDocumentation{.description="Extracts the first string in haystack that matches the regexp pattern and corresponds to the regex group index."}); /// For Spark compatibility. - factory.registerAlias("REGEXP_EXTRACT", "regexpExtract", FunctionFactory::CaseInsensitive); + factory.registerAlias("REGEXP_EXTRACT", "regexpExtract", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index aa90bf2490d..c001959b465 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -278,7 +278,7 @@ public: REGISTER_FUNCTION(Repeat) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/replaceAll.cpp b/src/Functions/replaceAll.cpp index 6c06f5984b3..9ce525390bf 100644 --- a/src/Functions/replaceAll.cpp +++ b/src/Functions/replaceAll.cpp @@ -20,7 +20,7 @@ using FunctionReplaceAll = FunctionStringReplace(); - factory.registerAlias("replace", NameReplaceAll::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("replace", NameReplaceAll::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/replaceRegexpAll.cpp b/src/Functions/replaceRegexpAll.cpp index f5f56fb0f35..77f21b6efee 100644 --- a/src/Functions/replaceRegexpAll.cpp +++ b/src/Functions/replaceRegexpAll.cpp @@ -20,7 +20,7 @@ using FunctionReplaceRegexpAll = FunctionStringReplace(); - factory.registerAlias("REGEXP_REPLACE", NameReplaceRegexpAll::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("REGEXP_REPLACE", NameReplaceRegexpAll::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/reverse.cpp b/src/Functions/reverse.cpp index 39608b77997..d23e48b8d42 100644 --- a/src/Functions/reverse.cpp +++ b/src/Functions/reverse.cpp @@ -113,7 +113,7 @@ private: REGISTER_FUNCTION(Reverse) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/right.cpp b/src/Functions/right.cpp index a8ab4bf9685..ef3303ab968 100644 --- a/src/Functions/right.cpp +++ b/src/Functions/right.cpp @@ -6,8 +6,8 @@ namespace DB REGISTER_FUNCTION(Right) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerFunction>({}, FunctionFactory::CaseSensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); + factory.registerFunction>({}, FunctionFactory::Case::Sensitive); } } diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp index d585affd91b..c94cbe67216 100644 --- a/src/Functions/runningAccumulate.cpp +++ b/src/Functions/runningAccumulate.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Functions/runningDifference.h b/src/Functions/runningDifference.h index fe477d13744..d51ea805f3c 100644 --- a/src/Functions/runningDifference.h +++ b/src/Functions/runningDifference.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include #include diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index e7e423058f1..fe999d66701 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -20,117 +21,125 @@ namespace DB namespace { + template + class FunctionServerConstantBase : public FunctionConstantBase + { + public: + using FunctionConstantBase::FunctionConstantBase; + bool isServerConstant() const override { return true; } + }; + #if defined(__ELF__) && !defined(OS_FREEBSD) /// buildId() - returns the compiler build id of the running binary. - class FunctionBuildId : public FunctionConstantBase + class FunctionBuildId : public FunctionServerConstantBase { public: static constexpr auto name = "buildId"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionBuildId(ContextPtr context) : FunctionConstantBase(SymbolIndex::instance().getBuildIDHex(), context->isDistributed()) {} + explicit FunctionBuildId(ContextPtr context) : FunctionServerConstantBase(SymbolIndex::instance().getBuildIDHex(), context->isDistributed()) {} }; #endif /// Get the host name. It is constant on single server, but is not constant in distributed queries. - class FunctionHostName : public FunctionConstantBase + class FunctionHostName : public FunctionServerConstantBase { public: static constexpr auto name = "hostName"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionHostName(ContextPtr context) : FunctionConstantBase(DNSResolver::instance().getHostName(), context->isDistributed()) {} + explicit FunctionHostName(ContextPtr context) : FunctionServerConstantBase(DNSResolver::instance().getHostName(), context->isDistributed()) {} }; - class FunctionServerUUID : public FunctionConstantBase + class FunctionServerUUID : public FunctionServerConstantBase { public: static constexpr auto name = "serverUUID"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionServerUUID(ContextPtr context) : FunctionConstantBase(ServerUUID::get(), context->isDistributed()) {} + explicit FunctionServerUUID(ContextPtr context) : FunctionServerConstantBase(ServerUUID::get(), context->isDistributed()) {} }; - class FunctionTCPPort : public FunctionConstantBase + class FunctionTCPPort : public FunctionServerConstantBase { public: static constexpr auto name = "tcpPort"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTCPPort(ContextPtr context) : FunctionConstantBase(context->getTCPPort(), context->isDistributed()) {} + explicit FunctionTCPPort(ContextPtr context) : FunctionServerConstantBase(context->getTCPPort(), context->isDistributed()) {} }; /// Returns timezone for current session. - class FunctionTimezone : public FunctionConstantBase + class FunctionTimezone : public FunctionServerConstantBase { public: static constexpr auto name = "timezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionTimezone(ContextPtr context) : FunctionConstantBase(DateLUT::instance().getTimeZone(), context->isDistributed()) {} + explicit FunctionTimezone(ContextPtr context) : FunctionServerConstantBase(DateLUT::instance().getTimeZone(), context->isDistributed()) {} }; /// Returns the server time zone (timezone in which server runs). - class FunctionServerTimezone : public FunctionConstantBase + class FunctionServerTimezone : public FunctionServerConstantBase { public: static constexpr auto name = "serverTimezone"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionServerTimezone(ContextPtr context) : FunctionConstantBase(DateLUT::serverTimezoneInstance().getTimeZone(), context->isDistributed()) {} + explicit FunctionServerTimezone(ContextPtr context) : FunctionServerConstantBase(DateLUT::serverTimezoneInstance().getTimeZone(), context->isDistributed()) {} }; /// Returns server uptime in seconds. - class FunctionUptime : public FunctionConstantBase + class FunctionUptime : public FunctionServerConstantBase { public: static constexpr auto name = "uptime"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionUptime(ContextPtr context) : FunctionConstantBase(context->getUptimeSeconds(), context->isDistributed()) {} + explicit FunctionUptime(ContextPtr context) : FunctionServerConstantBase(context->getUptimeSeconds(), context->isDistributed()) {} }; /// version() - returns the current version as a string. - class FunctionVersion : public FunctionConstantBase + class FunctionVersion : public FunctionServerConstantBase { public: static constexpr auto name = "version"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionVersion(ContextPtr context) : FunctionConstantBase(VERSION_STRING, context->isDistributed()) {} + explicit FunctionVersion(ContextPtr context) : FunctionServerConstantBase(VERSION_STRING, context->isDistributed()) {} }; /// revision() - returns the current revision. - class FunctionRevision : public FunctionConstantBase + class FunctionRevision : public FunctionServerConstantBase { public: static constexpr auto name = "revision"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionRevision(ContextPtr context) : FunctionConstantBase(ClickHouseRevision::getVersionRevision(), context->isDistributed()) {} + explicit FunctionRevision(ContextPtr context) : FunctionServerConstantBase(ClickHouseRevision::getVersionRevision(), context->isDistributed()) {} }; - class FunctionZooKeeperSessionUptime : public FunctionConstantBase + class FunctionZooKeeperSessionUptime : public FunctionServerConstantBase { public: static constexpr auto name = "zookeeperSessionUptime"; explicit FunctionZooKeeperSessionUptime(ContextPtr context) - : FunctionConstantBase(context->getZooKeeperSessionUptime(), context->isDistributed()) + : FunctionServerConstantBase(context->getZooKeeperSessionUptime(), context->isDistributed()) { } static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } }; - class FunctionGetOSKernelVersion : public FunctionConstantBase + class FunctionGetOSKernelVersion : public FunctionServerConstantBase { public: static constexpr auto name = "getOSKernelVersion"; - explicit FunctionGetOSKernelVersion(ContextPtr context) : FunctionConstantBase(Poco::Environment::osName() + " " + Poco::Environment::osVersion(), context->isDistributed()) {} + explicit FunctionGetOSKernelVersion(ContextPtr context) : FunctionServerConstantBase(Poco::Environment::osName() + " " + Poco::Environment::osVersion(), context->isDistributed()) {} static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } }; - class FunctionDisplayName : public FunctionConstantBase + class FunctionDisplayName : public FunctionServerConstantBase { public: static constexpr auto name = "displayName"; - explicit FunctionDisplayName(ContextPtr context) : FunctionConstantBase(context->getConfigRef().getString("display_name", getFQDNOrHostName()), context->isDistributed()) {} + explicit FunctionDisplayName(ContextPtr context) : FunctionServerConstantBase(context->getConfigRef().getString("display_name", getFQDNOrHostName()), context->isDistributed()) {} static FunctionPtr create(ContextPtr context) {return std::make_shared(context); } }; } @@ -197,12 +206,12 @@ REGISTER_FUNCTION(Uptime) REGISTER_FUNCTION(Version) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } REGISTER_FUNCTION(Revision) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } REGISTER_FUNCTION(ZooKeeperSessionUptime) @@ -228,8 +237,7 @@ Returns the value of `display_name` from config or server FQDN if not set. )", .examples{{"displayName", "SELECT displayName();", ""}}, .categories{"Constant", "Miscellaneous"} - }, - FunctionFactory::CaseSensitive); + }); } diff --git a/src/Functions/sign.cpp b/src/Functions/sign.cpp index 3dd2ac8e3aa..914e1ad9e1f 100644 --- a/src/Functions/sign.cpp +++ b/src/Functions/sign.cpp @@ -44,7 +44,7 @@ struct FunctionUnaryArithmeticMonotonicity REGISTER_FUNCTION(Sign) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/sin.cpp b/src/Functions/sin.cpp index 914f431adb4..945486b26a8 100644 --- a/src/Functions/sin.cpp +++ b/src/Functions/sin.cpp @@ -21,7 +21,7 @@ REGISTER_FUNCTION(Sin) .returned_value = "The sine of x.", .examples = {{.name = "simple", .query = "SELECT sin(1.23)", .result = "0.9424888019316975"}}, .categories{"Mathematical", "Trigonometric"}}, - FunctionFactory::CaseInsensitive); + FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 22748e86888..62ee19fa904 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 31ea6a28ece..0d4d65ee4b0 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include diff --git a/src/Functions/snowflakeIDToDateTime.cpp b/src/Functions/snowflakeIDToDateTime.cpp index 9a1d5b8a74b..6c57c72fff1 100644 --- a/src/Functions/snowflakeIDToDateTime.cpp +++ b/src/Functions/snowflakeIDToDateTime.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -38,7 +39,7 @@ public: static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionSnowflakeIDToDateTime(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} String getName() const override { return name; } @@ -109,7 +110,7 @@ public: static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionSnowflakeIDToDateTime64(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} String getName() const override { return name; } diff --git a/src/Functions/soundex.cpp b/src/Functions/soundex.cpp index 77ddb14a6ec..fcf1523d1a3 100644 --- a/src/Functions/soundex.cpp +++ b/src/Functions/soundex.cpp @@ -112,7 +112,7 @@ struct NameSoundex REGISTER_FUNCTION(Soundex) { factory.registerFunction>( - FunctionDocumentation{.description="Returns Soundex code of a string."}, FunctionFactory::CaseInsensitive); + FunctionDocumentation{.description="Returns Soundex code of a string."}, FunctionFactory::Case::Insensitive); } diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp index ce12f2f541c..cd6ca73c088 100644 --- a/src/Functions/space.cpp +++ b/src/Functions/space.cpp @@ -173,7 +173,7 @@ public: REGISTER_FUNCTION(Space) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/sqrt.cpp b/src/Functions/sqrt.cpp index 3c50f994391..a6e2dee71d9 100644 --- a/src/Functions/sqrt.cpp +++ b/src/Functions/sqrt.cpp @@ -13,7 +13,7 @@ using FunctionSqrt = FunctionMathUnary>; REGISTER_FUNCTION(Sqrt) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/stem.cpp b/src/Functions/stem.cpp index d326d17a7a8..5b845cf332b 100644 --- a/src/Functions/stem.cpp +++ b/src/Functions/stem.cpp @@ -3,6 +3,7 @@ #if USE_NLP #include +#include #include #include #include diff --git a/src/Functions/structureToFormatSchema.cpp b/src/Functions/structureToFormatSchema.cpp index 406da372c04..4fc2bf707a4 100644 --- a/src/Functions/structureToFormatSchema.cpp +++ b/src/Functions/structureToFormatSchema.cpp @@ -116,8 +116,7 @@ Function that converts ClickHouse table structure to CapnProto format schema "}"}, }, .categories{"Other"} - }, - FunctionFactory::CaseSensitive); + }); } @@ -138,8 +137,7 @@ Function that converts ClickHouse table structure to Protobuf format schema "}"}, }, .categories{"Other"} - }, - FunctionFactory::CaseSensitive); + }); } } diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp index f1dea7db018..51980eb6b9c 100644 --- a/src/Functions/substring.cpp +++ b/src/Functions/substring.cpp @@ -201,12 +201,12 @@ public: REGISTER_FUNCTION(Substring) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerAlias("substr", "substring", FunctionFactory::CaseInsensitive); // MySQL alias - factory.registerAlias("mid", "substring", FunctionFactory::CaseInsensitive); /// MySQL alias - factory.registerAlias("byteSlice", "substring", FunctionFactory::CaseInsensitive); /// resembles PostgreSQL's get_byte function, similar to ClickHouse's bitSlice + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); + factory.registerAlias("substr", "substring", FunctionFactory::Case::Insensitive); // MySQL alias + factory.registerAlias("mid", "substring", FunctionFactory::Case::Insensitive); /// MySQL alias + factory.registerAlias("byteSlice", "substring", FunctionFactory::Case::Insensitive); /// resembles PostgreSQL's get_byte function, similar to ClickHouse's bitSlice - factory.registerFunction>({}, FunctionFactory::CaseSensitive); + factory.registerFunction>(); } } diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 15a321bd5b0..eccd849059b 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -314,7 +314,7 @@ REGISTER_FUNCTION(SubstringIndex) factory.registerFunction>(); /// substringIndex factory.registerFunction>(); /// substringIndexUTF8 - factory.registerAlias("SUBSTRING_INDEX", "substringIndex", FunctionFactory::CaseInsensitive); + factory.registerAlias("SUBSTRING_INDEX", "substringIndex", FunctionFactory::Case::Insensitive); } diff --git a/src/Functions/synonyms.cpp b/src/Functions/synonyms.cpp index 6b3e205785c..18c1557115f 100644 --- a/src/Functions/synonyms.cpp +++ b/src/Functions/synonyms.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -120,7 +121,7 @@ public: REGISTER_FUNCTION(Synonyms) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/tan.cpp b/src/Functions/tan.cpp index e39f8598419..51cf0bbcceb 100644 --- a/src/Functions/tan.cpp +++ b/src/Functions/tan.cpp @@ -13,7 +13,7 @@ using FunctionTan = FunctionMathUnary>; REGISTER_FUNCTION(Tan) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/tanh.cpp b/src/Functions/tanh.cpp index bdefa5263d7..62755737f70 100644 --- a/src/Functions/tanh.cpp +++ b/src/Functions/tanh.cpp @@ -39,7 +39,7 @@ using FunctionTanh = FunctionMathUnary>; REGISTER_FUNCTION(Tanh) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index c6ff5b9151f..becc6d2f772 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/timestamp.cpp b/src/Functions/timestamp.cpp index 6f2bd2030d5..c2e10a2d220 100644 --- a/src/Functions/timestamp.cpp +++ b/src/Functions/timestamp.cpp @@ -187,7 +187,7 @@ If the second argument 'expr_time' is provided, it adds the specified time to th {"timestamp", "SELECT timestamp('2013-12-31 12:00:00')", "2013-12-31 12:00:00.000000"}, {"timestamp", "SELECT timestamp('2013-12-31 12:00:00', '12:00:00.11')", "2014-01-01 00:00:00.110000"}, }, - .categories{"DateTime"}}, FunctionFactory::CaseInsensitive); + .categories{"DateTime"}}, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toCustomWeek.cpp b/src/Functions/toCustomWeek.cpp index 98e7aaf1d6b..61c0767654e 100644 --- a/src/Functions/toCustomWeek.cpp +++ b/src/Functions/toCustomWeek.cpp @@ -21,8 +21,8 @@ REGISTER_FUNCTION(ToCustomWeek) factory.registerFunction(); /// Compatibility aliases for mysql. - factory.registerAlias("week", "toWeek", FunctionFactory::CaseInsensitive); - factory.registerAlias("yearweek", "toYearWeek", FunctionFactory::CaseInsensitive); + factory.registerAlias("week", "toWeek", FunctionFactory::Case::Insensitive); + factory.registerAlias("yearweek", "toYearWeek", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toDayOfMonth.cpp b/src/Functions/toDayOfMonth.cpp index c20b0b75797..93013c3528b 100644 --- a/src/Functions/toDayOfMonth.cpp +++ b/src/Functions/toDayOfMonth.cpp @@ -14,8 +14,8 @@ REGISTER_FUNCTION(ToDayOfMonth) factory.registerFunction(); /// MySQL compatibility alias. - factory.registerAlias("DAY", "toDayOfMonth", FunctionFactory::CaseInsensitive); - factory.registerAlias("DAYOFMONTH", "toDayOfMonth", FunctionFactory::CaseInsensitive); + factory.registerAlias("DAY", "toDayOfMonth", FunctionFactory::Case::Insensitive); + factory.registerAlias("DAYOFMONTH", "toDayOfMonth", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toDayOfWeek.cpp b/src/Functions/toDayOfWeek.cpp index dc508d70814..d1f55bbddab 100644 --- a/src/Functions/toDayOfWeek.cpp +++ b/src/Functions/toDayOfWeek.cpp @@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToDayOfWeek) factory.registerFunction(); /// MySQL compatibility alias. - factory.registerAlias("DAYOFWEEK", "toDayOfWeek", FunctionFactory::CaseInsensitive); + factory.registerAlias("DAYOFWEEK", "toDayOfWeek", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toDayOfYear.cpp b/src/Functions/toDayOfYear.cpp index 0cbafd6275a..9a27c41b0ed 100644 --- a/src/Functions/toDayOfYear.cpp +++ b/src/Functions/toDayOfYear.cpp @@ -14,7 +14,7 @@ REGISTER_FUNCTION(ToDayOfYear) factory.registerFunction(); /// MySQL compatibility alias. - factory.registerAlias("DAYOFYEAR", "toDayOfYear", FunctionFactory::CaseInsensitive); + factory.registerAlias("DAYOFYEAR", "toDayOfYear", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toDaysSinceYearZero.cpp b/src/Functions/toDaysSinceYearZero.cpp index f6239b2900b..b5c053a11b3 100644 --- a/src/Functions/toDaysSinceYearZero.cpp +++ b/src/Functions/toDaysSinceYearZero.cpp @@ -20,7 +20,7 @@ The calculation is the same as in MySQL's TO_DAYS() function. .categories{"Dates and Times"}}); /// MySQL compatibility alias. - factory.registerAlias("TO_DAYS", FunctionToDaysSinceYearZero::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("TO_DAYS", FunctionToDaysSinceYearZero::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toDecimalString.cpp b/src/Functions/toDecimalString.cpp index 4ee664ad237..523948a5396 100644 --- a/src/Functions/toDecimalString.cpp +++ b/src/Functions/toDecimalString.cpp @@ -273,7 +273,7 @@ second argument is the desired number of digits in fractional part. Returns Stri )", .examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}}, .categories{"String"} - }, FunctionFactory::CaseInsensitive); + }, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toHour.cpp b/src/Functions/toHour.cpp index fc9ec657adf..bc122538661 100644 --- a/src/Functions/toHour.cpp +++ b/src/Functions/toHour.cpp @@ -14,7 +14,7 @@ REGISTER_FUNCTION(ToHour) factory.registerFunction(); /// MySQL compatibility alias. - factory.registerAlias("HOUR", "toHour", FunctionFactory::CaseInsensitive); + factory.registerAlias("HOUR", "toHour", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toLastDayOfMonth.cpp b/src/Functions/toLastDayOfMonth.cpp index 9365880bfb8..004ae2718e7 100644 --- a/src/Functions/toLastDayOfMonth.cpp +++ b/src/Functions/toLastDayOfMonth.cpp @@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToLastDayOfMonth) factory.registerFunction(); /// MySQL compatibility alias. - factory.registerAlias("LAST_DAY", "toLastDayOfMonth", FunctionFactory::CaseInsensitive); + factory.registerAlias("LAST_DAY", "toLastDayOfMonth", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toMillisecond.cpp b/src/Functions/toMillisecond.cpp index aaef517c996..efa08c322a2 100644 --- a/src/Functions/toMillisecond.cpp +++ b/src/Functions/toMillisecond.cpp @@ -27,7 +27,7 @@ Returns the millisecond component (0-999) of a date with time. ); /// MySQL compatibility alias. - factory.registerAlias("MILLISECOND", "toMillisecond", FunctionFactory::CaseInsensitive); + factory.registerAlias("MILLISECOND", "toMillisecond", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toMinute.cpp b/src/Functions/toMinute.cpp index 162ecb282df..291da33d2e8 100644 --- a/src/Functions/toMinute.cpp +++ b/src/Functions/toMinute.cpp @@ -14,7 +14,7 @@ REGISTER_FUNCTION(ToMinute) factory.registerFunction(); /// MySQL compatibility alias. - factory.registerAlias("MINUTE", "toMinute", FunctionFactory::CaseInsensitive); + factory.registerAlias("MINUTE", "toMinute", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toMonth.cpp b/src/Functions/toMonth.cpp index 422f21e7df8..3ef73bf1be3 100644 --- a/src/Functions/toMonth.cpp +++ b/src/Functions/toMonth.cpp @@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToMonth) { factory.registerFunction(); /// MySQL compatibility alias. - factory.registerAlias("MONTH", "toMonth", FunctionFactory::CaseInsensitive); + factory.registerAlias("MONTH", "toMonth", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toQuarter.cpp b/src/Functions/toQuarter.cpp index 3c301095ff2..2e6d4fa93de 100644 --- a/src/Functions/toQuarter.cpp +++ b/src/Functions/toQuarter.cpp @@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToQuarter) { factory.registerFunction(); /// MySQL compatibility alias. - factory.registerAlias("QUARTER", "toQuarter", FunctionFactory::CaseInsensitive); + factory.registerAlias("QUARTER", "toQuarter", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toSecond.cpp b/src/Functions/toSecond.cpp index 372097fd488..1ad3b46fbd7 100644 --- a/src/Functions/toSecond.cpp +++ b/src/Functions/toSecond.cpp @@ -14,7 +14,7 @@ REGISTER_FUNCTION(ToSecond) factory.registerFunction(); /// MySQL compatibility alias. - factory.registerAlias("SECOND", "toSecond", FunctionFactory::CaseInsensitive); + factory.registerAlias("SECOND", "toSecond", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/toTimezone.cpp b/src/Functions/toTimezone.cpp index a0d90351898..cbcdf9bc772 100644 --- a/src/Functions/toTimezone.cpp +++ b/src/Functions/toTimezone.cpp @@ -9,6 +9,7 @@ #include #include +#include namespace DB { @@ -88,7 +89,7 @@ public: size_t getNumberOfArguments() const override { return 2; } static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique(context); } explicit ToTimeZoneOverloadResolver(ContextPtr context) - : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) {} DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override diff --git a/src/Functions/toTypeName.cpp b/src/Functions/toTypeName.cpp index 63bcc58d65e..abe02148d4a 100644 --- a/src/Functions/toTypeName.cpp +++ b/src/Functions/toTypeName.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include diff --git a/src/Functions/toYear.cpp b/src/Functions/toYear.cpp index 75479adb82c..0d2c8136337 100644 --- a/src/Functions/toYear.cpp +++ b/src/Functions/toYear.cpp @@ -14,7 +14,7 @@ REGISTER_FUNCTION(ToYear) factory.registerFunction(); /// MySQL compatibility alias. - factory.registerAlias("YEAR", "toYear", FunctionFactory::CaseInsensitive); + factory.registerAlias("YEAR", "toYear", FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/today.cpp b/src/Functions/today.cpp index 356660fa7b5..88eddc9b60e 100644 --- a/src/Functions/today.cpp +++ b/src/Functions/today.cpp @@ -84,8 +84,8 @@ public: REGISTER_FUNCTION(Today) { factory.registerFunction(); - factory.registerAlias("current_date", TodayOverloadResolver::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("curdate", TodayOverloadResolver::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("current_date", TodayOverloadResolver::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("curdate", TodayOverloadResolver::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 0dbc9946710..68500779f93 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -138,8 +138,7 @@ namespace } } - ColumnPtr executeImpl( - const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { std::call_once(once, [&] { initialize(arguments, result_type); }); diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index 71f50abba6c..84772f79d08 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -1,8 +1,15 @@ #include +#include + namespace DB { +FunctionPtr FunctionTuple::create(DB::ContextPtr context) +{ + return std::make_shared(context->getSettingsRef().enable_named_columns_in_function_tuple); +} + REGISTER_FUNCTION(Tuple) { factory.registerFunction(FunctionDocumentation{ diff --git a/src/Functions/tuple.h b/src/Functions/tuple.h index 94529d86861..03ba7ec9ef2 100644 --- a/src/Functions/tuple.h +++ b/src/Functions/tuple.h @@ -21,11 +21,7 @@ class FunctionTuple : public IFunction public: static constexpr auto name = "tuple"; - /// maybe_unused: false-positive - [[maybe_unused]] static FunctionPtr create(ContextPtr context) - { - return std::make_shared(context->getSettingsRef().enable_named_columns_in_function_tuple); - } + static FunctionPtr create(ContextPtr context); explicit FunctionTuple(bool enable_named_columns_ = false) : enable_named_columns(enable_named_columns_) { } diff --git a/src/Functions/upper.cpp b/src/Functions/upper.cpp index 3e1c7b1d800..5af0f059e3f 100644 --- a/src/Functions/upper.cpp +++ b/src/Functions/upper.cpp @@ -18,8 +18,8 @@ using FunctionUpper = FunctionStringToString, NameUpper REGISTER_FUNCTION(Upper) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerAlias("ucase", FunctionUpper::name, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::Case::Insensitive); + factory.registerAlias("ucase", FunctionUpper::name, FunctionFactory::Case::Insensitive); } } diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index 7a2598f5f4b..5e23493c86d 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -1576,9 +1576,9 @@ using TupleOrArrayFunctionCosineDistance = TupleOrArrayFunction(); - factory.registerAlias("vectorSum", FunctionTuplePlus::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("vectorSum", FunctionTuplePlus::name, FunctionFactory::Case::Insensitive); factory.registerFunction(); - factory.registerAlias("vectorDifference", FunctionTupleMinus::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("vectorDifference", FunctionTupleMinus::name, FunctionFactory::Case::Insensitive); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); @@ -1652,7 +1652,7 @@ If the types of the first interval (or the interval in the tuple) and the second factory.registerFunction(); factory.registerFunction(); - factory.registerAlias("scalarProduct", TupleOrArrayFunctionDotProduct::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("scalarProduct", TupleOrArrayFunctionDotProduct::name, FunctionFactory::Case::Insensitive); factory.registerFunction(); factory.registerFunction(); @@ -1660,11 +1660,11 @@ If the types of the first interval (or the interval in the tuple) and the second factory.registerFunction(); factory.registerFunction(); - factory.registerAlias("normL1", TupleOrArrayFunctionL1Norm::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("normL2", TupleOrArrayFunctionL2Norm::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("normL2Squared", TupleOrArrayFunctionL2SquaredNorm::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("normLinf", TupleOrArrayFunctionLinfNorm::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("normLp", FunctionLpNorm::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("normL1", TupleOrArrayFunctionL1Norm::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("normL2", TupleOrArrayFunctionL2Norm::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("normL2Squared", TupleOrArrayFunctionL2SquaredNorm::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("normLinf", TupleOrArrayFunctionLinfNorm::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("normLp", FunctionLpNorm::name, FunctionFactory::Case::Insensitive); factory.registerFunction(); factory.registerFunction(); @@ -1672,21 +1672,21 @@ If the types of the first interval (or the interval in the tuple) and the second factory.registerFunction(); factory.registerFunction(); - factory.registerAlias("distanceL1", FunctionL1Distance::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("distanceL2", FunctionL2Distance::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("distanceL2Squared", FunctionL2SquaredDistance::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("distanceLinf", FunctionLinfDistance::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("distanceLp", FunctionLpDistance::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("distanceL1", FunctionL1Distance::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("distanceL2", FunctionL2Distance::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("distanceL2Squared", FunctionL2SquaredDistance::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("distanceLinf", FunctionLinfDistance::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("distanceLp", FunctionLpDistance::name, FunctionFactory::Case::Insensitive); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerAlias("normalizeL1", FunctionL1Normalize::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("normalizeL2", FunctionL2Normalize::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("normalizeLinf", FunctionLinfNormalize::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("normalizeLp", FunctionLpNormalize::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("normalizeL1", FunctionL1Normalize::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("normalizeL2", FunctionL2Normalize::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("normalizeLinf", FunctionLinfNormalize::name, FunctionFactory::Case::Insensitive); + factory.registerAlias("normalizeLp", FunctionLpNormalize::name, FunctionFactory::Case::Insensitive); factory.registerFunction(); } diff --git a/src/Functions/visibleWidth.cpp b/src/Functions/visibleWidth.cpp index 9a3edd9fbec..ebd4a1ff713 100644 --- a/src/Functions/visibleWidth.cpp +++ b/src/Functions/visibleWidth.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/src/Functions/widthBucket.cpp b/src/Functions/widthBucket.cpp index 62ed460ca9d..d007cc968f0 100644 --- a/src/Functions/widthBucket.cpp +++ b/src/Functions/widthBucket.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ #include #include #include +#include #include #include @@ -285,7 +287,7 @@ Result: .categories{"Mathematical"}, }); - factory.registerAlias("width_bucket", "widthBucket", FunctionFactory::CaseInsensitive); + factory.registerAlias("width_bucket", "widthBucket", FunctionFactory::Case::Insensitive); } } diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index da6214ae477..662a2c067cf 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -1,7 +1,10 @@ +#include +#include #include -#include #include +#include + namespace DB { diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index b86ec44d21c..b84db46d656 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include @@ -10,6 +9,7 @@ namespace DB { +struct ServerSettings; struct Settings; #define APPLY_FOR_ALL_CONNECTION_TIMEOUT_MEMBERS(M) \ diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 9e001232e65..bf97e959ee0 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -25,8 +25,6 @@ namespace ProfileEvents extern const Event ReadBufferFromS3InitMicroseconds; extern const Event ReadBufferFromS3Bytes; extern const Event ReadBufferFromS3RequestsErrors; - extern const Event ReadBufferFromS3ResetSessions; - extern const Event ReadBufferFromS3PreservedSessions; extern const Event ReadBufferSeekCancelConnection; extern const Event S3GetObject; extern const Event DiskS3GetObject; diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 55441cfb86b..6e659c82def 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -23,10 +23,11 @@ #include #include - #include #include +#include + #include @@ -824,6 +825,17 @@ void Client::updateURIForBucket(const std::string & bucket, S3::URI new_uri) con cache->uri_for_bucket_cache.emplace(bucket, std::move(new_uri)); } +ClientCache::ClientCache(const ClientCache & other) +{ + { + std::lock_guard lock(other.region_cache_mutex); + region_for_bucket_cache = other.region_for_bucket_cache; + } + { + std::lock_guard lock(other.uri_cache_mutex); + uri_for_bucket_cache = other.uri_for_bucket_cache; + } +} void ClientCache::clearCache() { diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index 2fd0a9cd2d1..de554e75d67 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -54,10 +54,7 @@ struct ClientCache { ClientCache() = default; - ClientCache(const ClientCache & other) - : region_for_bucket_cache(other.region_for_bucket_cache) - , uri_for_bucket_cache(other.uri_for_bucket_cache) - {} + ClientCache(const ClientCache & other); ClientCache(ClientCache && other) = delete; @@ -66,11 +63,11 @@ struct ClientCache void clearCache(); - std::mutex region_cache_mutex; - std::unordered_map region_for_bucket_cache; + mutable std::mutex region_cache_mutex; + std::unordered_map region_for_bucket_cache TSA_GUARDED_BY(region_cache_mutex); - std::mutex uri_cache_mutex; - std::unordered_map uri_for_bucket_cache; + mutable std::mutex uri_cache_mutex; + std::unordered_map uri_for_bucket_cache TSA_GUARDED_BY(uri_cache_mutex); }; class ClientCacheRegistry @@ -89,7 +86,7 @@ private: ClientCacheRegistry() = default; std::mutex clients_mutex; - std::unordered_map> client_caches; + std::unordered_map> client_caches TSA_GUARDED_BY(clients_mutex); }; bool isS3ExpressEndpoint(const std::string & endpoint); @@ -162,7 +159,7 @@ public: class RetryStrategy : public Aws::Client::RetryStrategy { public: - explicit RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 90000); + explicit RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 5000); /// NOLINTNEXTLINE(google-runtime-int) bool ShouldRetry(const Aws::Client::AWSError& error, long attemptedRetries) const override; diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 0b3e5e50f3d..361b5a707fd 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -290,9 +290,9 @@ namespace if (!total_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen"); - auto max_part_number = request_settings.max_part_number; - auto min_upload_part_size = request_settings.min_upload_part_size; - auto max_upload_part_size = request_settings.max_upload_part_size; + UInt64 max_part_number = request_settings.max_part_number; + UInt64 min_upload_part_size = request_settings.min_upload_part_size; + UInt64 max_upload_part_size = request_settings.max_upload_part_size; if (!max_part_number) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_part_number must not be 0"); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 490bf8c2d0c..a794cdbcf05 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "config.h" diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 2dca08871d3..fcfd0cfffae 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -30,6 +30,8 @@ namespace ErrorCodes extern const int S3_ERROR; } +struct Settings; + class S3Exception : public Exception { public: diff --git a/src/IO/S3Settings.cpp b/src/IO/S3Settings.cpp index a5a50c873cb..f99e8c0105b 100644 --- a/src/IO/S3Settings.cpp +++ b/src/IO/S3Settings.cpp @@ -1,9 +1,11 @@ #include +#include #include -#include #include +#include + namespace DB { diff --git a/src/IO/tests/gtest_memory_resize.cpp b/src/IO/tests/gtest_memory_resize.cpp index d760a948075..c3b34c352b2 100644 --- a/src/IO/tests/gtest_memory_resize.cpp +++ b/src/IO/tests/gtest_memory_resize.cpp @@ -134,7 +134,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding) ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); @@ -158,7 +158,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding) ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); @@ -197,7 +197,7 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding) , ErrorCodes::ARGUMENT_OUT_OF_BOUND); } -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD { EXPECT_THROW_ERROR_CODE( { diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index 3c1af6538ad..b53a8b58023 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -917,8 +917,8 @@ TEST_P(SyncAsync, ExceptionOnUploadPart) { TEST_F(WBS3Test, PrefinalizeCalledMultipleTimes) { -#ifdef ABORT_ON_LOGICAL_ERROR - GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if ABORT_ON_LOGICAL_ERROR is not defined"; +#ifdef DEBUG_OR_SANITIZER_BUILD + GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if DEBUG_OR_SANITIZER_BUILD is not defined"; #else EXPECT_THROW({ try { diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 32c51b745c7..855aa36b159 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -114,6 +114,34 @@ namespace else if (query.grantees) user.grantees = *query.grantees; } + + time_t getValidUntilFromAST(ASTPtr valid_until, ContextPtr context) + { + if (context) + valid_until = evaluateConstantExpressionAsLiteral(valid_until, context); + + const String valid_until_str = checkAndGetLiteralArgument(valid_until, "valid_until"); + + if (valid_until_str == "infinity") + return 0; + + time_t time = 0; + ReadBufferFromString in(valid_until_str); + + if (context) + { + const auto & time_zone = DateLUT::instance(""); + const auto & utc_time_zone = DateLUT::instance("UTC"); + + parseDateTimeBestEffort(time, in, time_zone, utc_time_zone); + } + else + { + readDateTimeText(time, in); + } + + return time; + } } BlockIO InterpreterCreateUserQuery::execute() @@ -134,23 +162,7 @@ BlockIO InterpreterCreateUserQuery::execute() std::optional valid_until; if (query.valid_until) - { - const ASTPtr valid_until_literal = evaluateConstantExpressionAsLiteral(query.valid_until, getContext()); - const String valid_until_str = checkAndGetLiteralArgument(valid_until_literal, "valid_until"); - - time_t time = 0; - - if (valid_until_str != "infinity") - { - const auto & time_zone = DateLUT::instance(""); - const auto & utc_time_zone = DateLUT::instance("UTC"); - - ReadBufferFromString in(valid_until_str); - parseDateTimeBestEffort(time, in, time_zone, utc_time_zone); - } - - valid_until = time; - } + valid_until = getValidUntilFromAST(query.valid_until, getContext()); std::optional default_roles_from_query; if (query.default_roles) @@ -259,7 +271,11 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat if (query.auth_data) auth_data = AuthenticationData::fromAST(*query.auth_data, {}, !query.attach); - updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, {}, allow_no_password, allow_plaintext_password, true); + std::optional valid_until; + if (query.valid_until) + valid_until = getValidUntilFromAST(query.valid_until, {}); + + updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, valid_until, allow_no_password, allow_plaintext_password, true); } void registerInterpreterCreateUserQuery(InterpreterFactory & factory) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 9efb1d89a47..a874067e43c 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -442,7 +443,7 @@ FutureSetPtr makeExplicitSet( else block = createBlockForSet(left_arg_type, right_arg, set_element_types, context); - return prepared_sets.addFromTuple(set_key, block, context->getSettings()); + return prepared_sets.addFromTuple(set_key, block, context->getSettingsRef()); } class ScopeStack::Index diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp index 54ff53764e7..df7a0b48057 100644 --- a/src/Interpreters/ArrayJoinAction.cpp +++ b/src/Interpreters/ArrayJoinAction.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 56055e7044c..62777524c2a 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,7 +301,13 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); + InterpreterInsertQuery interpreter( + query, + query_context, + query_context->getSettingsRef().insert_allow_materialized_columns, + /* no_squash */ false, + /* no_destination */ false, + /* async_insert */ false); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); @@ -784,7 +790,12 @@ try try { interpreter = std::make_unique( - key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); + key.query, + insert_context, + key.settings.insert_allow_materialized_columns, + false, + false, + true); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); @@ -1003,7 +1014,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( } Chunk chunk(executor.getResultColumns(), total_rows); - chunk.setChunkInfo(std::move(chunk_info)); + chunk.getChunkInfos().add(std::move(chunk_info)); return chunk; } @@ -1055,7 +1066,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( } Chunk chunk(std::move(result_columns), total_rows); - chunk.setChunkInfo(std::move(chunk_info)); + chunk.getChunkInfos().add(std::move(chunk_info)); return chunk; } diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index a3848fa3a75..a88c0de2cfe 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1007,7 +1007,7 @@ void FileCache::freeSpaceRatioKeepingThreadFunc() limits_satisfied = main_priority->collectCandidatesForEviction( desired_size, desired_elements_num, keep_up_free_space_remove_batch, stat, eviction_candidates, lock); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD /// Let's make sure that we correctly processed the limits. if (limits_satisfied && eviction_candidates.size() < keep_up_free_space_remove_batch) { @@ -1110,7 +1110,7 @@ void FileCache::removeAllReleasable(const UserID & user_id) { assertInitialized(); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif @@ -1226,7 +1226,7 @@ void FileCache::loadMetadataImpl() if (first_exception) std::rethrow_exception(first_exception); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif } @@ -1393,7 +1393,7 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir) FileCache::~FileCache() { deactivateBackgroundOperations(); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif } diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 838ca0b491e..1664a91b694 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -67,7 +67,7 @@ FileSegment::FileSegment( , key_metadata(key_metadata_) , queue_iterator(queue_iterator_) , cache(cache_) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD , log(getLogger(fmt::format("FileSegment({}) : {}", key_.toString(), range().toString()))) #else , log(getLogger("FileSegment")) @@ -385,9 +385,9 @@ void FileSegment::write(char * from, size_t size, size_t offset_in_file) try { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD /// This mutex is only needed to have a valid assertion in assertCacheCorrectness(), - /// which is only executed in debug/sanitizer builds (under ABORT_ON_LOGICAL_ERROR). + /// which is only executed in debug/sanitizer builds (under DEBUG_OR_SANITIZER_BUILD). std::lock_guard lock(write_mutex); #endif diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 1d23278a255..7e4b76d3cc6 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -963,7 +963,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( } else if (!can_be_broken) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); #else LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}", diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index dd038948adf..e6ebf6ad50c 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -12,6 +12,8 @@ #include #include +#include + namespace DB { diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index ce1efb61cc0..39fdef23baa 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -95,7 +95,7 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS) { writeVarUInt(static_cast(collaborate_with_initiator), out); - writeVarUInt(count_participating_replicas, out); + writeVarUInt(obsolete_count_participating_replicas, out); writeVarUInt(number_of_current_replica, out); } } @@ -185,7 +185,7 @@ void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) UInt64 value; readVarUInt(value, in); collaborate_with_initiator = static_cast(value); - readVarUInt(count_participating_replicas, in); + readVarUInt(obsolete_count_participating_replicas, in); readVarUInt(number_of_current_replica, in); } } diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 3054667e264..ca32b4c5cfa 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -127,7 +127,7 @@ public: /// For parallel processing on replicas bool collaborate_with_initiator{false}; - UInt64 count_participating_replicas{0}; + UInt64 obsolete_count_participating_replicas{0}; UInt64 number_of_current_replica{0}; enum class BackgroundOperationType : uint8_t diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 94a02eacdaa..e35d31d2350 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 4493a9f4dbd..ac940c62a1a 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -310,7 +310,7 @@ IColumn::Selector ConcurrentHashJoin::selectDispatchBlock(const Strings & key_co { const auto & key_col = from_block.getByName(key_name).column->convertToFullColumnIfConst(); const auto & key_col_no_lc = recursiveRemoveLowCardinality(recursiveRemoveSparse(key_col)); - key_col_no_lc->updateWeakHash32(hash); + hash.update(key_col_no_lc->getWeakHash32()); } return hashToSelector(hash, num_shards); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f2626696492..94bcb88ed53 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,6 @@ #include #include #include -#include #include #include #include @@ -824,8 +824,69 @@ void ContextSharedMutex::lockSharedImpl() ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); } -ContextData::ContextData() = default; -ContextData::ContextData(const ContextData &) = default; +ContextData::ContextData() +{ + settings = std::make_unique(); +} +ContextData::ContextData(const ContextData &o) : + shared(o.shared), + client_info(o.client_info), + external_tables_initializer_callback(o.external_tables_initializer_callback), + input_initializer_callback(o.input_initializer_callback), + input_blocks_reader(o.input_blocks_reader), + user_id(o.user_id), + current_roles(o.current_roles), + settings_constraints_and_current_profiles(o.settings_constraints_and_current_profiles), + access(o.access), + need_recalculate_access(o.need_recalculate_access), + current_database(o.current_database), + settings(std::make_unique(*o.settings)), + progress_callback(o.progress_callback), + file_progress_callback(o.file_progress_callback), + process_list_elem(o.process_list_elem), + has_process_list_elem(o.has_process_list_elem), + insertion_table_info(o.insertion_table_info), + is_distributed(o.is_distributed), + default_format(o.default_format), + insert_format(o.insert_format), + external_tables_mapping(o.external_tables_mapping), + scalars(o.scalars), + special_scalars(o.special_scalars), + next_task_callback(o.next_task_callback), + merge_tree_read_task_callback(o.merge_tree_read_task_callback), + merge_tree_all_ranges_callback(o.merge_tree_all_ranges_callback), + parallel_replicas_group_uuid(o.parallel_replicas_group_uuid), + client_protocol_version(o.client_protocol_version), + query_access_info(std::make_shared(*o.query_access_info)), + query_factories_info(o.query_factories_info), + query_privileges_info(o.query_privileges_info), + async_read_counters(o.async_read_counters), + view_source(o.view_source), + table_function_results(o.table_function_results), + query_context(o.query_context), + session_context(o.session_context), + global_context(o.global_context), + buffer_context(o.buffer_context), + is_internal_query(o.is_internal_query), + temp_data_on_disk(o.temp_data_on_disk), + classifier(o.classifier), + prepared_sets_cache(o.prepared_sets_cache), + offset_parallel_replicas_enabled(o.offset_parallel_replicas_enabled), + kitchen_sink(o.kitchen_sink), + part_uuids(o.part_uuids), + ignored_part_uuids(o.ignored_part_uuids), + query_parameters(o.query_parameters), + host_context(o.host_context), + metadata_transaction(o.metadata_transaction), + merge_tree_transaction(o.merge_tree_transaction), + merge_tree_transaction_holder(o.merge_tree_transaction_holder), + remote_read_query_throttler(o.remote_read_query_throttler), + remote_write_query_throttler(o.remote_write_query_throttler), + local_read_query_throttler(o.local_read_query_throttler), + local_write_query_throttler(o.local_write_query_throttler), + backups_query_throttler(o.backups_query_throttler) +{ +} Context::Context() = default; Context::Context(const Context & rhs) : ContextData(rhs), std::enable_shared_from_this(rhs) {} @@ -865,7 +926,6 @@ ContextMutablePtr Context::createCopy(const ContextPtr & other) { SharedLockGuard lock(other->mutex); auto new_context = std::shared_ptr(new Context(*other)); - new_context->query_access_info = std::make_shared(*other->query_access_info); return new_context; } @@ -971,7 +1031,7 @@ Strings Context::getWarnings() const } /// Make setting's name ordered std::set obsolete_settings; - for (const auto & setting : settings) + for (const auto & setting : *settings) { if (setting.isValueChanged() && setting.isObsolete()) obsolete_settings.emplace(setting.getName()); @@ -1486,7 +1546,7 @@ std::shared_ptr Context::getAccess() const /// If setUserID() was never called then this must be the global context with the full access. bool full_access = !user_id; - return ContextAccessParams{user_id, full_access, /* use_default_roles= */ false, current_roles, settings, current_database, client_info}; + return ContextAccessParams{user_id, full_access, /* use_default_roles= */ false, current_roles, *settings, current_database, client_info}; }; /// Check if the current access rights are still valid, otherwise get parameters for recalculating access rights. @@ -1568,7 +1628,7 @@ void Context::setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_i checkSettingsConstraintsWithLock(profiles_info.settings, SettingSource::PROFILE); applySettingsChangesWithLock(profiles_info.settings, lock); settings_constraints_and_current_profiles = profiles_info.getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); - contextSanityClampSettingsWithLock(*this, settings, lock); + contextSanityClampSettingsWithLock(*this, *settings, lock); } void Context::setCurrentProfile(const String & profile_name, bool check_constraints) @@ -2209,15 +2269,15 @@ bool Context::displaySecretsInShowAndSelect() const Settings Context::getSettings() const { SharedLockGuard lock(mutex); - return settings; + return *settings; } void Context::setSettings(const Settings & settings_) { std::lock_guard lock(mutex); - settings = settings_; + *settings = settings_; need_recalculate_access = true; - contextSanityClampSettings(*this, settings); + contextSanityClampSettings(*this, *settings); } void Context::setSettingWithLock(std::string_view name, const String & value, const std::lock_guard & lock) @@ -2227,10 +2287,10 @@ void Context::setSettingWithLock(std::string_view name, const String & value, co setCurrentProfileWithLock(value, true /*check_constraints*/, lock); return; } - settings.set(name, value); + settings->set(name, value); if (ContextAccessParams::dependsOnSettingName(name)) need_recalculate_access = true; - contextSanityClampSettingsWithLock(*this, settings, lock); + contextSanityClampSettingsWithLock(*this, *settings, lock); } void Context::setSettingWithLock(std::string_view name, const Field & value, const std::lock_guard & lock) @@ -2240,7 +2300,7 @@ void Context::setSettingWithLock(std::string_view name, const Field & value, con setCurrentProfileWithLock(value.safeGet(), true /*check_constraints*/, lock); return; } - settings.set(name, value); + settings->set(name, value); if (ContextAccessParams::dependsOnSettingName(name)) need_recalculate_access = true; } @@ -2250,7 +2310,7 @@ void Context::applySettingChangeWithLock(const SettingChange & change, const std try { setSettingWithLock(change.name, change.value, lock); - contextSanityClampSettingsWithLock(*this, settings, lock); + contextSanityClampSettingsWithLock(*this, *settings, lock); } catch (Exception & e) { @@ -2265,7 +2325,7 @@ void Context::applySettingsChangesWithLock(const SettingsChanges & changes, cons { for (const SettingChange & change : changes) applySettingChangeWithLock(change, lock); - applySettingsQuirks(settings); + applySettingsQuirks(*settings); } void Context::setSetting(std::string_view name, const String & value) @@ -2278,7 +2338,7 @@ void Context::setSetting(std::string_view name, const Field & value) { std::lock_guard lock(mutex); setSettingWithLock(name, value, lock); - contextSanityClampSettingsWithLock(*this, settings, lock); + contextSanityClampSettingsWithLock(*this, *settings, lock); } void Context::setServerSetting(std::string_view name, const Field & value) @@ -2311,37 +2371,37 @@ void Context::applySettingsChanges(const SettingsChanges & changes) void Context::checkSettingsConstraintsWithLock(const SettingsProfileElements & profile_elements, SettingSource source) { - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, profile_elements, source); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(*settings, profile_elements, source); if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::checkSettingsConstraintsWithLock(const SettingChange & change, SettingSource source) { - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, change, source); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(*settings, change, source); if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::checkSettingsConstraintsWithLock(const SettingsChanges & changes, SettingSource source) { - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes, source); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(*settings, changes, source); if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::checkSettingsConstraintsWithLock(SettingsChanges & changes, SettingSource source) { - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes, source); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(*settings, changes, source); if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::clampToSettingsConstraintsWithLock(SettingsChanges & changes, SettingSource source) { - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.clamp(settings, changes, source); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.clamp(*settings, changes, source); if (getApplicationType() == ApplicationType::LOCAL || getApplicationType() == ApplicationType::SERVER) - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::checkMergeTreeSettingsConstraintsWithLock(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const @@ -2364,8 +2424,8 @@ void Context::checkSettingsConstraints(const SettingChange & change, SettingSour void Context::checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) { SharedLockGuard lock(mutex); - getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes, source); - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(*settings, changes, source); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); } void Context::checkSettingsConstraints(SettingsChanges & changes, SettingSource source) @@ -2390,7 +2450,7 @@ void Context::resetSettingsToDefaultValue(const std::vector & names) { std::lock_guard lock(mutex); for (const String & name: names) - settings.setDefaultValue(name); + settings->setDefaultValue(name); } std::shared_ptr Context::getSettingsConstraintsAndCurrentProfilesWithLock() const @@ -2610,14 +2670,14 @@ void Context::makeQueryContextForMerge(const MergeTreeSettings & merge_tree_sett { makeQueryContext(); classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes - settings.workload = merge_tree_settings.merge_workload.value.empty() ? getMergeWorkload() : merge_tree_settings.merge_workload; + settings->workload = merge_tree_settings.merge_workload.value.empty() ? getMergeWorkload() : merge_tree_settings.merge_workload; } void Context::makeQueryContextForMutate(const MergeTreeSettings & merge_tree_settings) { makeQueryContext(); classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes - settings.workload = merge_tree_settings.mutation_workload.value.empty() ? getMutationWorkload() : merge_tree_settings.mutation_workload; + settings->workload = merge_tree_settings.mutation_workload.value.empty() ? getMutationWorkload() : merge_tree_settings.mutation_workload; } void Context::makeSessionContext() @@ -3900,7 +3960,7 @@ void Context::reloadClusterConfig() const } const auto & config = cluster_config ? *cluster_config : getConfigRef(); - auto new_clusters = std::make_shared(config, settings, getMacros()); + auto new_clusters = std::make_shared(config, *settings, getMacros()); { std::lock_guard lock(shared->clusters_mutex); @@ -3935,7 +3995,7 @@ std::shared_ptr Context::getClustersImpl(std::lock_guard & if (!shared->clusters) { const auto & config = shared->clusters_config ? *shared->clusters_config : getConfigRef(); - shared->clusters = std::make_shared(config, settings, getMacros()); + shared->clusters = std::make_shared(config, *settings, getMacros()); } return shared->clusters; @@ -3967,9 +4027,9 @@ void Context::setClustersConfig(const ConfigurationPtr & config, bool enable_dis shared->clusters_config = config; if (!shared->clusters) - shared->clusters = std::make_shared(*shared->clusters_config, settings, getMacros(), config_name); + shared->clusters = std::make_shared(*shared->clusters_config, *settings, getMacros(), config_name); else - shared->clusters->updateClusters(*shared->clusters_config, settings, config_name, old_clusters_config); + shared->clusters->updateClusters(*shared->clusters_config, *settings, config_name, old_clusters_config); ++shared->clusters_version; } @@ -4230,7 +4290,7 @@ std::shared_ptr Context::getBackupLog() const std::shared_ptr Context::getBlobStorageLog() const { - bool enable_blob_storage_log = settings.enable_blob_storage_log; + bool enable_blob_storage_log = settings->enable_blob_storage_log; if (hasQueryContext()) enable_blob_storage_log = getQueryContext()->getSettingsRef().enable_blob_storage_log; @@ -4671,8 +4731,8 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi shared->system_profile_name = config.getString("system_profile", shared->default_profile_name); setCurrentProfile(shared->system_profile_name); - applySettingsQuirks(settings, getLogger("SettingsQuirks")); - doSettingsSanityCheckClamp(settings, getLogger("SettingsSanity")); + applySettingsQuirks(*settings, getLogger("SettingsQuirks")); + doSettingsSanityCheckClamp(*settings, getLogger("SettingsSanity")); shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name); buffer_context = Context::createCopy(shared_from_this()); @@ -4941,13 +5001,6 @@ void Context::setConnectionClientVersion(UInt64 client_version_major, UInt64 cli client_info.connection_tcp_protocol_version = client_tcp_protocol_version; } -void Context::setReplicaInfo(bool collaborate_with_initiator, size_t all_replicas_count, size_t number_of_current_replica) -{ - client_info.collaborate_with_initiator = collaborate_with_initiator; - client_info.count_participating_replicas = all_replicas_count; - client_info.number_of_current_replica = number_of_current_replica; -} - void Context::increaseDistributedDepth() { ++client_info.distributed_depth; @@ -5241,11 +5294,11 @@ AsynchronousInsertQueue * Context::tryGetAsynchronousInsertQueue() const void Context::setAsynchronousInsertQueue(const std::shared_ptr & ptr) { - AsynchronousInsertQueue::validateSettings(settings, getLogger("Context")); + AsynchronousInsertQueue::validateSettings(*settings, getLogger("Context")); SharedLockGuard lock(shared->mutex); - if (std::chrono::milliseconds(settings.async_insert_poll_timeout_ms) == std::chrono::milliseconds::zero()) + if (std::chrono::milliseconds(settings->async_insert_poll_timeout_ms) == std::chrono::milliseconds::zero()) throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting async_insert_poll_timeout_ms can't be zero"); shared->async_insert_queue = ptr; @@ -5392,69 +5445,69 @@ ReadSettings Context::getReadSettings() const { ReadSettings res; - std::string_view read_method_str = settings.local_filesystem_read_method.value; + std::string_view read_method_str = settings->local_filesystem_read_method.value; if (auto opt_method = magic_enum::enum_cast(read_method_str)) res.local_fs_method = *opt_method; else throw Exception(ErrorCodes::UNKNOWN_READ_METHOD, "Unknown read method '{}' for local filesystem", read_method_str); - read_method_str = settings.remote_filesystem_read_method.value; + read_method_str = settings->remote_filesystem_read_method.value; if (auto opt_method = magic_enum::enum_cast(read_method_str)) res.remote_fs_method = *opt_method; else throw Exception(ErrorCodes::UNKNOWN_READ_METHOD, "Unknown read method '{}' for remote filesystem", read_method_str); - res.local_fs_prefetch = settings.local_filesystem_read_prefetch; - res.remote_fs_prefetch = settings.remote_filesystem_read_prefetch; + res.local_fs_prefetch = settings->local_filesystem_read_prefetch; + res.remote_fs_prefetch = settings->remote_filesystem_read_prefetch; - res.load_marks_asynchronously = settings.load_marks_asynchronously; + res.load_marks_asynchronously = settings->load_marks_asynchronously; - res.enable_filesystem_read_prefetches_log = settings.enable_filesystem_read_prefetches_log; + res.enable_filesystem_read_prefetches_log = settings->enable_filesystem_read_prefetches_log; - res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms; - res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries; - res.enable_filesystem_cache = settings.enable_filesystem_cache; - res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; - res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; - res.filesystem_cache_segments_batch_size = settings.filesystem_cache_segments_batch_size; - res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + res.remote_fs_read_max_backoff_ms = settings->remote_fs_read_max_backoff_ms; + res.remote_fs_read_backoff_max_tries = settings->remote_fs_read_backoff_max_tries; + res.enable_filesystem_cache = settings->enable_filesystem_cache; + res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings->read_from_filesystem_cache_if_exists_otherwise_bypass_cache; + res.enable_filesystem_cache_log = settings->enable_filesystem_cache_log; + res.filesystem_cache_segments_batch_size = settings->filesystem_cache_segments_batch_size; + res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings->filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; - res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; - res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; + res.filesystem_cache_max_download_size = settings->filesystem_cache_max_download_size; + res.skip_download_if_exceeds_query_cache = settings->skip_download_if_exceeds_query_cache; res.page_cache = getPageCache(); - res.use_page_cache_for_disks_without_file_cache = settings.use_page_cache_for_disks_without_file_cache; - res.read_from_page_cache_if_exists_otherwise_bypass_cache = settings.read_from_page_cache_if_exists_otherwise_bypass_cache; - res.page_cache_inject_eviction = settings.page_cache_inject_eviction; + res.use_page_cache_for_disks_without_file_cache = settings->use_page_cache_for_disks_without_file_cache; + res.read_from_page_cache_if_exists_otherwise_bypass_cache = settings->read_from_page_cache_if_exists_otherwise_bypass_cache; + res.page_cache_inject_eviction = settings->page_cache_inject_eviction; - res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; + res.remote_read_min_bytes_for_seek = settings->remote_read_min_bytes_for_seek; /// Zero read buffer will not make progress. - if (!settings.max_read_buffer_size) + if (!settings->max_read_buffer_size) { throw Exception(ErrorCodes::INVALID_SETTING_VALUE, - "Invalid value '{}' for max_read_buffer_size", settings.max_read_buffer_size); + "Invalid value '{}' for max_read_buffer_size", settings->max_read_buffer_size); } res.local_fs_buffer_size - = settings.max_read_buffer_size_local_fs ? settings.max_read_buffer_size_local_fs : settings.max_read_buffer_size; + = settings->max_read_buffer_size_local_fs ? settings->max_read_buffer_size_local_fs : settings->max_read_buffer_size; res.remote_fs_buffer_size - = settings.max_read_buffer_size_remote_fs ? settings.max_read_buffer_size_remote_fs : settings.max_read_buffer_size; - res.prefetch_buffer_size = settings.prefetch_buffer_size; - res.direct_io_threshold = settings.min_bytes_to_use_direct_io; - res.mmap_threshold = settings.min_bytes_to_use_mmap_io; - res.priority = Priority{settings.read_priority}; + = settings->max_read_buffer_size_remote_fs ? settings->max_read_buffer_size_remote_fs : settings->max_read_buffer_size; + res.prefetch_buffer_size = settings->prefetch_buffer_size; + res.direct_io_threshold = settings->min_bytes_to_use_direct_io; + res.mmap_threshold = settings->min_bytes_to_use_mmap_io; + res.priority = Priority{settings->read_priority}; res.remote_throttler = getRemoteReadThrottler(); res.local_throttler = getLocalReadThrottler(); - res.http_max_tries = settings.http_max_tries; - res.http_retry_initial_backoff_ms = settings.http_retry_initial_backoff_ms; - res.http_retry_max_backoff_ms = settings.http_retry_max_backoff_ms; - res.http_skip_not_found_url_for_globs = settings.http_skip_not_found_url_for_globs; - res.http_make_head_request = settings.http_make_head_request; + res.http_max_tries = settings->http_max_tries; + res.http_retry_initial_backoff_ms = settings->http_retry_initial_backoff_ms; + res.http_retry_max_backoff_ms = settings->http_retry_max_backoff_ms; + res.http_skip_not_found_url_for_globs = settings->http_skip_not_found_url_for_globs; + res.http_make_head_request = settings->http_make_head_request; res.mmap_cache = getMMappedFileCache().get(); @@ -5465,13 +5518,13 @@ WriteSettings Context::getWriteSettings() const { WriteSettings res; - res.enable_filesystem_cache_on_write_operations = settings.enable_filesystem_cache_on_write_operations; - res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; - res.throw_on_error_from_cache = settings.throw_on_error_from_cache_on_write_operations; - res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + res.enable_filesystem_cache_on_write_operations = settings->enable_filesystem_cache_on_write_operations; + res.enable_filesystem_cache_log = settings->enable_filesystem_cache_log; + res.throw_on_error_from_cache = settings->throw_on_error_from_cache_on_write_operations; + res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings->filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; - res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload; - res.azure_allow_parallel_part_upload = settings.azure_allow_parallel_part_upload; + res.s3_allow_parallel_part_upload = settings->s3_allow_parallel_part_upload; + res.azure_allow_parallel_part_upload = settings->azure_allow_parallel_part_upload; res.remote_throttler = getRemoteWriteThrottler(); res.local_throttler = getLocalWriteThrottler(); @@ -5519,7 +5572,7 @@ bool Context::canUseParallelReplicasOnFollower() const bool Context::canUseParallelReplicasCustomKey() const { - return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY; + return settings->max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY; } bool Context::canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const @@ -5529,7 +5582,7 @@ bool Context::canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) bool Context::canUseOffsetParallelReplicas() const { - return offset_parallel_replicas_enabled && settings.max_parallel_replicas > 1 + return offset_parallel_replicas_enabled && settings->max_parallel_replicas > 1 && getParallelReplicasMode() != Context::ParallelReplicasMode::READ_TASKS; } @@ -5541,13 +5594,13 @@ void Context::disableOffsetParallelReplicas() ClusterPtr Context::getClusterForParallelReplicas() const { /// check cluster for parallel replicas - if (settings.cluster_for_parallel_replicas.value.empty()) + if (settings->cluster_for_parallel_replicas.value.empty()) throw Exception( ErrorCodes::CLUSTER_DOESNT_EXIST, "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set " "'cluster_for_parallel_replicas' setting"); - return getCluster(settings.cluster_for_parallel_replicas); + return getCluster(settings->cluster_for_parallel_replicas); } void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache) @@ -5575,4 +5628,39 @@ const ServerSettings & Context::getServerSettings() const return shared->server_settings; } +uint64_t HTTPContext::getMaxHstsAge() const +{ + return context->getSettingsRef().hsts_max_age; +} + + uint64_t HTTPContext::getMaxUriSize() const +{ + return context->getSettingsRef().http_max_uri_size; +} + +uint64_t HTTPContext::getMaxFields() const +{ +return context->getSettingsRef().http_max_fields; +} + +uint64_t HTTPContext::getMaxFieldNameSize() const +{ +return context->getSettingsRef().http_max_field_name_size; +} + +uint64_t HTTPContext::getMaxFieldValueSize() const +{ +return context->getSettingsRef().http_max_field_value_size; +} + +Poco::Timespan HTTPContext::getReceiveTimeout() const +{ +return context->getSettingsRef().http_receive_timeout; +} + +Poco::Timespan HTTPContext::getSendTimeout() const +{ +return context->getSettingsRef().http_send_timeout; +} + } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 8c5492bcbc8..cb553d07513 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -11,9 +11,11 @@ #include #include #include -#include #include +#include #include +#include +#include #include #include #include @@ -129,6 +131,7 @@ class ShellCommand; class ICompressionCodec; class AccessControl; class GSSAcceptorContext; +struct Settings; struct SettingsConstraintsAndProfileIDs; class SettingsProfileElements; class RemoteHostFilter; @@ -276,7 +279,7 @@ protected: mutable std::shared_ptr access; mutable bool need_recalculate_access = true; String current_database; - Settings settings; /// Setting for query execution. + std::unique_ptr settings{}; /// Setting for query execution. using ProgressCallback = std::function; ProgressCallback progress_callback; /// Callback for tracking progress of query execution. @@ -696,7 +699,6 @@ public: void setInitialQueryStartTime(std::chrono::time_point initial_query_start_time); void setQuotaClientKey(const String & quota_key); void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); - void setReplicaInfo(bool collaborate_with_initiator, size_t all_replicas_count, size_t number_of_current_replica); void increaseDistributedDepth(); const OpenTelemetry::TracingContext & getClientTraceContext() const { return client_info.client_trace_context; } OpenTelemetry::TracingContext & getClientTraceContext() { return client_info.client_trace_context; } @@ -952,7 +954,7 @@ public: void makeSessionContext(); void makeGlobalContext(); - const Settings & getSettingsRef() const { return settings; } + const Settings & getSettingsRef() const { return *settings; } void setProgressCallback(ProgressCallback callback); /// Used in executeQuery() to pass it to the QueryPipeline. @@ -1428,40 +1430,19 @@ struct HTTPContext : public IHTTPContext : context(Context::createCopy(context_)) {} - uint64_t getMaxHstsAge() const override - { - return context->getSettingsRef().hsts_max_age; - } + uint64_t getMaxHstsAge() const override; - uint64_t getMaxUriSize() const override - { - return context->getSettingsRef().http_max_uri_size; - } + uint64_t getMaxUriSize() const override; - uint64_t getMaxFields() const override - { - return context->getSettingsRef().http_max_fields; - } + uint64_t getMaxFields() const override; - uint64_t getMaxFieldNameSize() const override - { - return context->getSettingsRef().http_max_field_name_size; - } + uint64_t getMaxFieldNameSize() const override; - uint64_t getMaxFieldValueSize() const override - { - return context->getSettingsRef().http_max_field_value_size; - } + uint64_t getMaxFieldValueSize() const override; - Poco::Timespan getReceiveTimeout() const override - { - return context->getSettingsRef().http_receive_timeout; - } + Poco::Timespan getReceiveTimeout() const override; - Poco::Timespan getSendTimeout() const override - { - return context->getSettingsRef().http_send_timeout; - } + Poco::Timespan getSendTimeout() const override; ContextPtr context; }; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 6c346836ed8..fa197d59c13 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index 0b0460b26c8..515a35d8671 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 5639eed552e..697fd0f406b 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index a70ff3c6c53..4bd1c47d5a0 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/ExpressionActionsSettings.h b/src/Interpreters/ExpressionActionsSettings.h index 326de983748..384a61a54a2 100644 --- a/src/Interpreters/ExpressionActionsSettings.h +++ b/src/Interpreters/ExpressionActionsSettings.h @@ -1,7 +1,7 @@ #pragma once +#include #include -#include #include diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index 1685c06d387..5f6d7e33927 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include "config.h" diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 27c616ff40c..6d92fbd7a43 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 64b6eb5dce9..986134a6998 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 6970048269b..a241d1cd258 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index fa8ebd2c0f0..0c7cad4360d 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -125,7 +125,7 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s if (isCrossOrComma(kind)) { data->type = Type::CROSS; - sample_block_with_columns_to_add = right_sample_block; + sample_block_with_columns_to_add = materializeBlock(right_sample_block); } else if (table_join->getClauses().empty()) { diff --git a/src/Interpreters/IInterpreter.cpp b/src/Interpreters/IInterpreter.cpp index 148e73e43ce..a291199f24b 100644 --- a/src/Interpreters/IInterpreter.cpp +++ b/src/Interpreters/IInterpreter.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp index fed29b410db..d8f6df05ca4 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp @@ -1,4 +1,6 @@ #include + +#include #include #include #include @@ -15,6 +17,23 @@ namespace DB { +IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const DB::ASTPtr& query_ptr_, + const DB::ContextMutablePtr& context_, const DB::SelectQueryOptions& options_) + : query_ptr(query_ptr_) + , context(context_) + , options(options_) + , max_streams(context->getSettingsRef().max_threads) +{ + if (options.shard_num) + context->addSpecialScalar( + "_shard_num", + Block{{DataTypeUInt32().createColumnConst(1, *options.shard_num), std::make_shared(), "_shard_num"}}); + if (options.shard_count) + context->addSpecialScalar( + "_shard_count", + Block{{DataTypeUInt32().createColumnConst(1, *options.shard_count), std::make_shared(), "_shard_count"}}); +} + QueryPipelineBuilder IInterpreterUnionOrSelectQuery::buildQueryPipeline() { QueryPlan query_plan; diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.h b/src/Interpreters/IInterpreterUnionOrSelectQuery.h index e4425a73505..ac8e90b5f52 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.h +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.h @@ -17,21 +17,7 @@ public: { } - IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_, const ContextMutablePtr & context_, const SelectQueryOptions & options_) - : query_ptr(query_ptr_) - , context(context_) - , options(options_) - , max_streams(context->getSettingsRef().max_threads) - { - if (options.shard_num) - context->addSpecialScalar( - "_shard_num", - Block{{DataTypeUInt32().createColumnConst(1, *options.shard_num), std::make_shared(), "_shard_num"}}); - if (options.shard_count) - context->addSpecialScalar( - "_shard_count", - Block{{DataTypeUInt32().createColumnConst(1, *options.shard_count), std::make_shared(), "_shard_count"}}); - } + IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_, const ContextMutablePtr & context_, const SelectQueryOptions & options_); virtual void buildQueryPlan(QueryPlan & query_plan) = 0; QueryPipelineBuilder buildQueryPipeline(); diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index 36972aeb03d..24e594661e9 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -2,6 +2,8 @@ #include +#include + #include #include diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 3b3ef928b42..22ecb6a5db2 100644 --- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index c70a3397f4e..8f9c6bc533e 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -24,7 +26,6 @@ #include #include #include -#include #include #include @@ -175,10 +176,9 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) else throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query"); - if (!getContext()->getSettings().allow_experimental_statistics && ( - command_ast->type == ASTAlterCommand::ADD_STATISTICS || - command_ast->type == ASTAlterCommand::DROP_STATISTICS || - command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTICS)) + if (!getContext()->getSettingsRef().allow_experimental_statistics + && (command_ast->type == ASTAlterCommand::ADD_STATISTICS || command_ast->type == ASTAlterCommand::DROP_STATISTICS + || command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTICS)) throw Exception(ErrorCodes::INCORRECT_QUERY, "Alter table with statistics is now disabled. Turn on allow_experimental_statistics"); } diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 4a84a7bf570..2b022a8cee0 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -12,6 +13,8 @@ #include #include +#include + #include #include @@ -22,6 +25,7 @@ #include #include +#include #include #include #include @@ -91,7 +95,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con return Chunk(std::move(columns), 1); } -class TableCheckTask : public ChunkInfo +class TableCheckTask : public ChunkInfoCloneable { public: TableCheckTask(StorageID table_id, const std::variant & partition_or_part, ContextPtr context) @@ -110,6 +114,12 @@ public: context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID()); } + TableCheckTask(const TableCheckTask & other) + : table(other.table) + , check_data_tasks(other.check_data_tasks) + , is_finished(other.is_finished.load()) + {} + std::optional checkNext() const { if (isFinished()) @@ -121,8 +131,8 @@ public: std::this_thread::sleep_for(sleep_time); }); - IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks; - auto result = table->checkDataNext(check_data_tasks_); + IStorage::DataValidationTasksPtr tmp = check_data_tasks; + auto result = table->checkDataNext(tmp); is_finished = !result.has_value(); return result; } @@ -180,7 +190,7 @@ protected: /// source should return at least one row to start pipeline result.addColumn(ColumnUInt8::create(1, 1)); /// actual data stored in chunk info - result.setChunkInfo(std::move(current_check_task)); + result.getChunkInfos().add(std::move(current_check_task)); return result; } @@ -280,7 +290,7 @@ public: protected: void transform(Chunk & chunk) override { - auto table_check_task = std::dynamic_pointer_cast(chunk.getChunkInfo()); + auto table_check_task = chunk.getChunkInfos().get(); auto check_result = table_check_task->checkNext(); if (!check_result) { diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index a439cb672c8..1c6abe842d4 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 0ee2bb6c0e9..454cedae15c 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -5,7 +5,7 @@ #include #include -#include "Common/Exception.h" +#include #include #include #include @@ -14,9 +14,8 @@ #include #include #include +#include #include -#include -#include #include #include @@ -34,10 +33,11 @@ #include #include +#include #include #include -#include #include +#include #include #include @@ -81,13 +81,13 @@ #include #include -#include #include #include #include #include + namespace CurrentMetrics { extern const Metric AttachedTable; @@ -146,27 +146,27 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) } auto db_num_limit = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; - if (db_num_limit > 0) + if (db_num_limit > 0 && !internal) { size_t db_count = DatabaseCatalog::instance().getDatabases().size(); - std::vector system_databases = { + std::initializer_list system_databases = + { DatabaseCatalog::TEMPORARY_DATABASE, DatabaseCatalog::SYSTEM_DATABASE, DatabaseCatalog::INFORMATION_SCHEMA, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE, - DatabaseCatalog::DEFAULT_DATABASE }; for (const auto & system_database : system_databases) { - if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(system_database)) - db_count--; + if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(std::string(system_database))) + --db_count; } if (db_count >= db_num_limit) throw Exception(ErrorCodes::TOO_MANY_DATABASES, - "Too many databases in the Clickhouse. " - "The limit (setting 'max_database_num_to_throw') is set to {}, current number of databases is {}", + "Too many databases. " + "The limit (server configuration parameter `max_database_num_to_throw`) is set to {}, the current number of databases is {}", db_num_limit, db_count); } @@ -949,7 +949,7 @@ namespace throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines"); } - void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) + void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine) { if (engine == DefaultTableEngine::None) throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); @@ -969,9 +969,6 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view) return; - if (create.is_materialized_view && create.to_table_id) - return; - if (create.temporary) { /// Some part of storage definition is specified, but ENGINE is not: just set the one from default_temporary_table_engine setting. @@ -986,22 +983,44 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const } if (!create.storage->engine) - { setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value); - } checkTemporaryTableEngineName(create.storage->engine->name); return; } + if (create.is_materialized_view) + { + /// A materialized view with an external target doesn't need a table engine. + if (create.is_materialized_view_with_external_target()) + return; + + if (auto to_engine = create.getTargetInnerEngine(ViewTarget::To)) + { + /// This materialized view already has a storage definition. + if (!to_engine->engine) + { + /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. + setDefaultTableEngine(*to_engine, getContext()->getSettingsRef().default_table_engine.value); + } + return; + } + } + if (create.storage) { - /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. + /// This table already has a storage definition. if (!create.storage->engine) + { + /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); + } return; } + /// We'll try to extract a storage definition from clause `AS`: + /// CREATE TABLE table_name AS other_table_name + std::shared_ptr storage_def; if (!create.as_table.empty()) { /// NOTE Getting the structure from the table specified in the AS is done not atomically with the creation of the table. @@ -1017,12 +1036,14 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_ordinary_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name); - if (as_create.is_materialized_view && as_create.to_table_id) + if (as_create.is_materialized_view_with_external_target()) + { throw Exception( ErrorCodes::INCORRECT_QUERY, - "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS `{}`\" instead", + "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS {}\" instead", qualified_name, - as_create.to_table_id.getQualifiedName()); + as_create.getTargetTableID(ViewTarget::To).getFullTableName()); + } if (as_create.is_live_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name); @@ -1033,18 +1054,37 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_dictionary) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Dictionary", qualified_name); - if (as_create.storage) - create.set(create.storage, as_create.storage->ptr()); + if (as_create.is_materialized_view) + { + storage_def = as_create.getTargetInnerEngine(ViewTarget::To); + } else if (as_create.as_table_function) + { create.set(create.as_table_function, as_create.as_table_function->ptr()); + return; + } + else if (as_create.storage) + { + storage_def = typeid_cast>(as_create.storage->ptr()); + } else + { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug."); - - return; + } } - create.set(create.storage, std::make_shared()); - setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); + if (!storage_def) + { + /// Set ENGINE by default. + storage_def = std::make_shared(); + setDefaultTableEngine(*storage_def, getContext()->getSettingsRef().default_table_engine.value); + } + + /// Use the found table engine to modify the create query. + if (create.is_materialized_view) + create.setTargetInnerEngine(ViewTarget::To, storage_def); + else + create.set(create.storage, storage_def); } void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const DatabasePtr & database) const @@ -1086,11 +1126,11 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data kind_upper, create.table); } - create.generateRandomUUID(); + create.generateRandomUUIDs(); } else { - bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil; + bool has_uuid = (create.uuid != UUIDHelpers::Nil) || create.hasInnerUUIDs(); if (has_uuid && !is_on_cluster && !internal) { /// We don't show the following error message either @@ -1105,8 +1145,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data /// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either /// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or /// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts. - create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; + create.resetUUIDs(); } } @@ -1130,6 +1169,14 @@ void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies); } +bool isReplicated(const ASTStorage & storage) +{ + if (!storage.engine) + return false; + const auto & storage_name = storage.engine->name; + return storage_name.starts_with("Replicated") || storage_name.starts_with("Shared"); +} + } BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) @@ -1246,8 +1293,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!create.temporary && !create.database) create.setDatabase(current_database); - if (create.to_table_id && create.to_table_id.database_name.empty()) - create.to_table_id.database_name = current_database; + + if (create.targets) + create.targets->setCurrentDatabase(current_database); if (create.select && create.isView()) { @@ -1281,12 +1329,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create, mode); /// Check type compatible for materialized dest table and select columns - if (create.select && create.is_materialized_view && create.to_table_id && mode <= LoadingStrictnessLevel::CREATE) + if (create.is_materialized_view_with_external_target() && create.select && mode <= LoadingStrictnessLevel::CREATE) { - if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable( - {create.to_table_id.database_name, create.to_table_id.table_name, create.to_table_id.uuid}, - getContext() - )) + if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable(create.getTargetTableID(ViewTarget::To), getContext())) { Block input_block; @@ -1332,11 +1377,17 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; - if (create.storage && create.storage->engine) + + if (create.storage && isReplicated(*create.storage)) + is_storage_replicated = true; + + if (create.targets) { - const auto & storage_name = create.storage->engine->name; - if (storage_name.starts_with("Replicated") || storage_name.starts_with("Shared")) - is_storage_replicated = true; + for (const auto & inner_table_engine : create.targets->getInnerEngines()) + { + if (isReplicated(*inner_table_engine)) + is_storage_replicated = true; + } } const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; @@ -1600,13 +1651,13 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; - if (table_num_limit > 0 && create.getDatabase() != DatabaseCatalog::SYSTEM_DATABASE) + if (table_num_limit > 0 && !internal) { UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); if (table_count >= table_num_limit) throw Exception(ErrorCodes::TOO_MANY_TABLES, - "Too many tables in the Clickhouse. " - "The limit (setting 'max_table_num_to_throw') is set to {}, current number of tables is {}", + "Too many tables. " + "The limit (server configuration parameter `max_table_num_to_throw`) is set to {}, the current number of tables is {}", table_num_limit, table_count); } @@ -1776,8 +1827,13 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) else insert->select = create.select->clone(); - return InterpreterInsertQuery(insert, getContext(), - getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); + return InterpreterInsertQuery( + insert, + getContext(), + getContext()->getSettingsRef().insert_allow_materialized_columns, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false).execute(); } return {}; @@ -1790,7 +1846,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont /// For CREATE query generate UUID on initiator, so it will be the same on all hosts. /// It will be ignored if database does not support UUIDs. - create.generateRandomUUID(); + create.generateRandomUUIDs(); /// For cross-replication cluster we cannot use UUID in replica path. String cluster_name_expanded = local_context->getMacros()->expand(cluster_name); @@ -1912,8 +1968,15 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const } } - if (create.to_table_id) - required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, create.to_table_id.database_name, create.to_table_id.table_name); + if (create.targets) + { + for (const auto & target : create.targets->targets) + { + const auto & target_id = target.table_id; + if (target_id) + required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, target_id.database_name, target_id.table_name); + } + } if (create.storage && create.storage->engine) required_access.emplace_back(AccessType::TABLE_ENGINE, create.storage->engine->name); diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 70ef29e6b07..3982ea2cabc 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 39d5d9e9cef..84fe0a9f4ae 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 87b0eb89302..39fc85a5e23 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterDropIndexQuery.cpp b/src/Interpreters/InterpreterDropIndexQuery.cpp index f052aa201f1..8777532e4d0 100644 --- a/src/Interpreters/InterpreterDropIndexQuery.cpp +++ b/src/Interpreters/InterpreterDropIndexQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index ba788a2e1f3..b68b3ddcd48 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "config.h" diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 7c7b4b3f95a..bedd9cb4a80 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -69,7 +70,7 @@ namespace static void visit(ASTSelectQuery & select, ASTPtr & node, Data & data) { /// we need to read statistic when `allow_statistics_optimize` is enabled. - bool only_analyze = !data.getContext()->getSettings().allow_statistics_optimize; + bool only_analyze = !data.getContext()->getSettingsRef().allow_statistics_optimize; InterpreterSelectQuery interpreter( node, data.getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze(only_analyze).modify()); @@ -534,7 +535,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); + InterpreterInsertQuery insert( + ast.getExplainedQuery(), + getContext(), + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 387d056ffe7..12b3b510098 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -60,6 +60,7 @@ #include #include #include +#include namespace ProfileEvents diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f396db70d21..3906cef1995 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -35,9 +38,11 @@ #include #include #include +#include #include #include #include +#include "base/defines.h" namespace ProfileEvents @@ -394,28 +399,349 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } +std::pair, std::vector> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) +{ + chassert(presink_streams > 0); + chassert(sink_streams > 0); + + ThreadGroupPtr running_group; + if (current_thread) + running_group = current_thread->getThreadGroup(); + if (!running_group) + running_group = std::make_shared(getContext()); + + std::vector sink_chains; + std::vector presink_chains; + + for (size_t i = 0; i < sink_streams; ++i) + { + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, + running_group, /* elapsed_counter_ms= */ nullptr); + + sink_chains.emplace_back(std::move(out)); + } + + for (size_t i = 0; i < presink_streams; ++i) + { + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); + presink_chains.emplace_back(std::move(out)); + } + + return {std::move(presink_chains), std::move(sink_chains)}; +} + + +QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table) +{ + const Settings & settings = getContext()->getSettingsRef(); + + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); + + bool is_trivial_insert_select = false; + + if (settings.optimize_trivial_insert_select) + { + const auto & select_query = query.select->as(); + const auto & selects = select_query.list_of_selects->children; + const auto & union_modes = select_query.list_of_modes; + + /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries + const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; + + is_trivial_insert_select = + std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) + && std::all_of(selects.begin(), selects.end(), isTrivialSelect); + } + + ContextPtr select_context = getContext(); + + if (is_trivial_insert_select) + { + /** When doing trivial INSERT INTO ... SELECT ... FROM table, + * don't need to process SELECT with more than max_insert_threads + * and it's reasonable to set block size for SELECT to the desired block size for INSERT + * to avoid unnecessary squashing. + */ + + Settings new_settings = select_context->getSettings(); + + new_settings.max_threads = std::max(1, settings.max_insert_threads); + + if (table->prefersLargeBlocks()) + { + if (settings.min_insert_block_size_rows) + new_settings.max_block_size = settings.min_insert_block_size_rows; + if (settings.min_insert_block_size_bytes) + new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; + } + + auto context_for_trivial_select = Context::createCopy(context); + context_for_trivial_select->setSettings(new_settings); + context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); + + select_context = context_for_trivial_select; + } + + QueryPipelineBuilder pipeline; + + { + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + + pipeline.dropTotalsAndExtremes(); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. + if (getContext()->getSettingsRef().insert_null_as_default) + { + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); + const auto & output_columns = metadata_snapshot->getColumns(); + + if (input_columns.size() == query_columns.size()) + { + for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) + { + /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with + /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && !isDynamic(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) + { + query_sample_block.setColumn( + col_idx, + ColumnWithTypeAndName( + makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), + makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), + query_columns[col_idx].name)); + } + } + } + } + + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + query_sample_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + + size_t num_select_threads = pipeline.getNumThreads(); + + pipeline.resize(1); + + if (shouldAddSquashingFroStorage(table)) + { + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, + table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + if (!settings.insert_deduplication_token.value.empty()) + { + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(settings.insert_deduplication_token.value, in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + } + + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + + size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); + if (settings.max_insert_threads.changed) + presink_streams_size = std::max(1, settings.max_insert_threads); + + size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; + + size_t views_involved = table->isView() || !DatabaseCatalog::instance().getDependentViews(table->getStorageID()).empty(); + if (!settings.parallel_view_processing && views_involved) + { + sink_streams_size = 1; + } + + auto [presink_chains, sink_chains] = buildPreAndSinkChains( + presink_streams_size, sink_streams_size, + table, metadata_snapshot, query_sample_block); + + pipeline.resize(presink_chains.size()); + + if (shouldAddSquashingFroStorage(table)) + { + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, + table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + + for (auto & chain : presink_chains) + pipeline.addResources(chain.detachResources()); + pipeline.addChains(std::move(presink_chains)); + + pipeline.resize(sink_streams_size); + + for (auto & chain : sink_chains) + pipeline.addResources(chain.detachResources()); + pipeline.addChains(std::move(sink_chains)); + + if (!settings.parallel_view_processing && views_involved) + { + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); +} + + +QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table) +{ + const Settings & settings = getContext()->getSettingsRef(); + + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); + + Chain chain; + + { + auto [presink_chains, sink_chains] = buildPreAndSinkChains( + /* presink_streams */1, /* sink_streams */1, + table, metadata_snapshot, query_sample_block); + + chain = std::move(presink_chains.front()); + chain.appendChain(std::move(sink_chains.front())); + } + + if (!settings.insert_deduplication_token.value.empty()) + { + chain.addSource(std::make_shared(chain.getInputHeader())); + chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); + } + + chain.addSource(std::make_shared(chain.getInputHeader())); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(balancing)); + } + + auto context_ptr = getContext(); + auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + chain.addSource(std::move(counting)); + + QueryPipeline pipeline = QueryPipeline(std::move(chain)); + + pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); + pipeline.setConcurrencyControl(settings.use_concurrency_control); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); + pipeline.complete(std::move(pipe)); + } + + return pipeline; +} + + BlockIO InterpreterInsertQuery::execute() { const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); - QueryPipelineBuilder pipeline; - std::optional distributed_pipeline; - QueryPlanResourceHolder resources; StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - StoragePtr inner_table; - if (const auto * mv = dynamic_cast(table.get())) - inner_table = mv->getTargetTable(); - if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing @@ -423,320 +749,45 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (query.select && settings.parallel_distributed_insert_select) - // Distributed INSERT SELECT - distributed_pipeline = table->distributedWrite(query, getContext()); - - std::vector presink_chains; - std::vector sink_chains; - if (!distributed_pipeline) + if (!allow_materialized) { - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use the same streams for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - /// * If it's not an INSERT SELECT, forget all that and use one stream. - size_t pre_streams_size = 1; - size_t sink_streams_size = 1; - - if (query.select) - { - bool is_trivial_insert_select = false; - - if (settings.optimize_trivial_insert_select) - { - const auto & select_query = query.select->as(); - const auto & selects = select_query.list_of_selects->children; - const auto & union_modes = select_query.list_of_modes; - - /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries - const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; - - is_trivial_insert_select = - std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) - && std::all_of(selects.begin(), selects.end(), isTrivialSelect); - } - - if (is_trivial_insert_select) - { - /** When doing trivial INSERT INTO ... SELECT ... FROM table, - * don't need to process SELECT with more than max_insert_threads - * and it's reasonable to set block size for SELECT to the desired block size for INSERT - * to avoid unnecessary squashing. - */ - - Settings new_settings = getContext()->getSettings(); - - new_settings.max_threads = std::max(1, settings.max_insert_threads); - - if (table->prefersLargeBlocks()) - { - if (settings.min_insert_block_size_rows) - new_settings.max_block_size = settings.min_insert_block_size_rows; - if (settings.min_insert_block_size_bytes) - new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; - } - - auto new_context = Context::createCopy(context); - new_context->setSettings(new_settings); - new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); - - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - else - { - /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - - pipeline.dropTotalsAndExtremes(); - - if (settings.max_insert_threads > 1) - { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); - - /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. - /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. - const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); - pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads - : std::min(settings.max_insert_threads, pipeline.getNumStreams()); - - /// Deduplication when passing insert_deduplication_token breaks if using more than one thread - if (!settings.insert_deduplication_token.toString().empty()) - { - LOG_DEBUG( - getLogger("InsertQuery"), - "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); - pre_streams_size = 1; - } - - if (table->supportsParallelInsert()) - sink_streams_size = pre_streams_size; - } - - pipeline.resize(pre_streams_size); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. - if (getContext()->getSettingsRef().insert_null_as_default) - { - const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); - const auto & output_columns = metadata_snapshot->getColumns(); - - if (input_columns.size() == query_columns.size()) - { - for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) - { - /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with - /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) - && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) - && !isVariant(query_columns[col_idx].type) - && !isDynamic(query_columns[col_idx].type) - && output_columns.has(query_columns[col_idx].name)) - query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); - } - } - } - } - - ThreadGroupPtr running_group; - if (current_thread) - running_group = current_thread->getThreadGroup(); - if (!running_group) - running_group = std::make_shared(getContext()); - for (size_t i = 0; i < sink_streams_size; ++i) - { - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, - running_group, /* elapsed_counter_ms= */ nullptr); - sink_chains.emplace_back(std::move(out)); - } - for (size_t i = 0; i < pre_streams_size; ++i) - { - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); - presink_chains.emplace_back(std::move(out)); - } + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); } BlockIO res; - /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? - if (distributed_pipeline) + if (query.select) { - res.pipeline = std::move(*distributed_pipeline); - } - else if (query.select) - { - const auto & header = presink_chains.at(0).getInputHeader(); - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - header.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + if (settings.parallel_distributed_insert_select) { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - size_t threads = presink_chains.size(); - - pipeline.resize(1); - - pipeline.addTransform(std::make_shared( - header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); - - pipeline.resize(threads); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + auto distributed = table->distributedWrite(query, getContext()); + if (distributed) { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - }); + res.pipeline = std::move(*distributed); + } + else + { + res.pipeline = buildInsertSelectPipeline(query, table); + } } - - size_t num_select_threads = pipeline.getNumThreads(); - - for (auto & chain : presink_chains) - resources = chain.detachResources(); - for (auto & chain : sink_chains) - resources = chain.detachResources(); - - pipeline.addChains(std::move(presink_chains)); - pipeline.resize(sink_chains.size()); - pipeline.addChains(std::move(sink_chains)); - - if (!settings.parallel_view_processing) + else { - /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. - if (pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); + res.pipeline = buildInsertSelectPipeline(query, table); } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } - - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - if (!allow_materialized) - { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); - } - - res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); } else { - auto & chain = presink_chains.at(0); - chain.appendChain(std::move(sink_chains.at(0))); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - - auto balancing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(balancing)); - } - - auto context_ptr = getContext(); - auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - chain.addSource(std::move(counting)); - - res.pipeline = QueryPipeline(std::move(presink_chains[0])); - res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); - res.pipeline.setConcurrencyControl(settings.use_concurrency_control); - - if (query.hasInlinedData() && !async_insert) - { - /// can execute without additional data - auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); - for (auto && buffer : owned_buffers) - format->addBuffer(std::move(buffer)); - - auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); - res.pipeline.complete(std::move(pipe)); - } + res.pipeline = buildInsertPipeline(query, table); } - res.pipeline.addResources(std::move(resources)); - res.pipeline.addStorageHolder(table); - if (inner_table) - res.pipeline.addStorageHolder(inner_table); + + if (const auto * mv = dynamic_cast(table.get())) + res.pipeline.addStorageHolder(mv->getTargetTable()); + + LOG_TEST(getLogger("InterpreterInsertQuery"), "Pipeline could use up to {} thread", res.pipeline.getNumThreads()); return res; } @@ -757,17 +808,27 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont } } + void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const { extendQueryLogElemImpl(elem, context_); } + void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique(args.query, args.context, args.allow_materialized); + return std::make_unique( + args.query, + args.context, + args.allow_materialized, + /* no_squash */false, + /* no_destination */false, + /* async_insert */false); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } + + } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index bf73fb2a319..894c7c42144 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -23,10 +23,10 @@ public: InterpreterInsertQuery( const ASTPtr & query_ptr_, ContextPtr context_, - bool allow_materialized_ = false, - bool no_squash_ = false, - bool no_destination_ = false, - bool async_insert_ = false); + bool allow_materialized_, + bool no_squash_, + bool no_destination, + bool async_insert_); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -73,12 +73,17 @@ private: ASTPtr query_ptr; const bool allow_materialized; - const bool no_squash; - const bool no_destination; + bool no_squash = false; + bool no_destination = false; const bool async_insert; std::vector> owned_buffers; + std::pair, std::vector> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); + + QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); + QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); + Chain buildSink( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 6d6b1085ffb..7eb487ba7b3 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 32c475d138f..c6e52590f89 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 6eac2db20c9..b0da4caa3ac 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index f13710e9ad5..7bee497f6da 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -84,6 +84,8 @@ #include #include #include +#include +#include #include #include #include @@ -1724,7 +1726,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

getCurrentDataStream().header, join_clause.key_names_right); - if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) + if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys) { auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left); auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right); diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index cc1d7dd6531..e953a07b41d 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index f32ebceaa63..d8fff4e6026 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 0fca7b64d5a..16add79d226 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -94,7 +94,8 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() { auto & create = create_query->as(); create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; + if (create.targets) + create.targets->resetInnerUUIDs(); } MutableColumnPtr column = ColumnString::create(); diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index e8005ead91e..31f0404e123 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -33,12 +33,33 @@ String InterpreterShowIndexesQuery::getRewrittenQuery() String rewritten_query = fmt::format(R"( SELECT * FROM ( - (SELECT + (WITH + t1 AS ( + SELECT + name, + arrayJoin(splitByString(', ', primary_key)) AS pk_col + FROM + system.tables + WHERE + database = '{0}' + AND name = '{1}' + ), + t2 AS ( + SELECT + name, + pk_col, + row_number() OVER (ORDER BY 1) AS row_num + FROM + t1 + ) + SELECT name AS table, 1 AS non_unique, 'PRIMARY' AS key_name, - row_number() over (order by column_name) AS seq_in_index, - arrayJoin(splitByString(', ', primary_key)) AS column_name, + -- row_number() over (order by database) AS seq_in_index, + row_num AS seq_in_index, + -- arrayJoin(splitByString(', ', primary_key)) AS column_name, + pk_col, 'A' AS collation, 0 AS cardinality, NULL AS sub_part, @@ -49,10 +70,9 @@ FROM ( '' AS index_comment, 'YES' AS visible, '' AS expression - FROM system.tables - WHERE - database = '{0}' - AND name = '{1}') + FROM + t2 + ) UNION ALL ( SELECT table AS table, @@ -70,12 +90,13 @@ FROM ( '' AS index_comment, 'YES' AS visible, expr AS expression - FROM system.data_skipping_indices + FROM + system.data_skipping_indices WHERE database = '{0}' AND table = '{1}')) {2} -ORDER BY index_type, expression, column_name, seq_in_index;)", database, table, where_expression); +ORDER BY index_type, expression, seq_in_index;)", database, table, where_expression); /// Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to /// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 51038aaca46..78580f2ec02 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -121,9 +121,18 @@ String InterpreterShowTablesQuery::getRewrittenQuery() if (query.merges) { WriteBufferFromOwnString rewritten_query; - rewritten_query << "SELECT table, database, round((elapsed * (1 / merges.progress)) - merges.elapsed, 2) AS estimate_complete, round(elapsed,2) elapsed, " - "round(progress*100, 2) AS progress, is_mutation, formatReadableSize(total_size_bytes_compressed) AS size_compressed, " - "formatReadableSize(memory_usage) AS memory_usage FROM system.merges"; + rewritten_query << R"( + SELECT + table, + database, + merges.progress > 0 ? round(merges.elapsed * (1 - merges.progress) / merges.progress, 2) : NULL AS estimate_complete, + round(elapsed, 2) AS elapsed, + round(progress * 100, 2) AS progress, + is_mutation, + formatReadableSize(total_size_bytes_compressed) AS size_compressed, + formatReadableSize(memory_usage) AS memory_usage + FROM system.merges + )"; if (!query.like.empty()) { diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index d3526941b33..c284acfa308 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp index 13872fbe3f5..e92d6f9c5e7 100644 --- a/src/Interpreters/InterpreterTransactionControlQuery.cpp +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index 1788c9aca48..180a45d4295 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -554,7 +554,7 @@ static Blocks scatterBlockByHashImpl(const Strings & key_columns_names, const Bl for (const auto & key_name : key_columns_names) { ColumnPtr key_col = materializeColumn(block, key_name); - key_col->updateWeakHash32(hash); + hash.update(key_col->getWeakHash32()); } auto selector = hashToSelector(hash, sharder); diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 457ed3ef4a6..0de2bf9cb1f 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -1,5 +1,6 @@ #include +#include #include #include diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 6d3a4f30b34..480c6736bc5 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +43,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Interpreters/MutationsNonDeterministicHelpers.cpp b/src/Interpreters/MutationsNonDeterministicHelpers.cpp index 7a4cb91acc0..bcff3e18b25 100644 --- a/src/Interpreters/MutationsNonDeterministicHelpers.cpp +++ b/src/Interpreters/MutationsNonDeterministicHelpers.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h index e8194f0dfe1..b2f55003da5 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h @@ -1,11 +1,10 @@ #pragma once -#include - +#include #include #include -#include +#include namespace DB { diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp index 8dc8c1c92cc..af67e62be1a 100644 --- a/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index b04c8b1b725..43a04f8aff0 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index e63a2ae31aa..527159dc981 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index bbaa7179757..d9cce4b9dd7 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include @@ -22,6 +22,8 @@ namespace ProfileEvents namespace DB { +struct Settings; + /** Allows to log information about queries execution: * - info about start of query execution; diff --git a/src/Interpreters/QueryViewsLog.h b/src/Interpreters/QueryViewsLog.h index 2de06fe3ddc..8f382ba6183 100644 --- a/src/Interpreters/QueryViewsLog.h +++ b/src/Interpreters/QueryViewsLog.h @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/RewriteCountVariantsVisitor.cpp b/src/Interpreters/RewriteCountVariantsVisitor.cpp index f207bc51527..4a541c3765a 100644 --- a/src/Interpreters/RewriteCountVariantsVisitor.cpp +++ b/src/Interpreters/RewriteCountVariantsVisitor.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index fb80b12ee60..5b378087707 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index f8b6a6542cc..488177c3b4f 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,34 +1,47 @@ #include #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include +#include namespace DB { + namespace ErrorCodes { extern const int LOGICAL_ERROR; } Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : header(header_) - , min_block_size_rows(min_block_size_rows_) + : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) + , header(header_) { } Chunk Squashing::flush() { - return convertToChunk(std::move(chunks_to_merge_vec)); + if (!accumulated) + return {}; + + auto result = convertToChunk(extract()); + chassert(result); + return result; } Chunk Squashing::squash(Chunk && input_chunk) { - if (!input_chunk.hasChunkInfo()) + if (!input_chunk) return Chunk(); - const auto *info = getInfoFromChunk(input_chunk); - return squash(info->chunks); + auto squash_info = input_chunk.getChunkInfos().extract(); + + if (!squash_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos())); } Chunk Squashing::add(Chunk && input_chunk) @@ -37,67 +50,62 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; /// Just read block is already enough. - if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) + if (isEnoughSize(input_chunk)) { /// If no accumulated data, return just read block. - if (chunks_to_merge_vec.empty()) + if (!accumulated) { - chunks_to_merge_vec.push_back(std::move(input_chunk)); - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - return res_chunk; + accumulated.add(std::move(input_chunk)); + return convertToChunk(extract()); } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(extract()); + accumulated.add(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + if (isEnoughSize()) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(extract()); + accumulated.add(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector - expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + accumulated.add(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) - { - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - changeCurrentSize(0, 0); - chunks_to_merge_vec.clear(); - return res_chunk; - } + if (isEnoughSize()) + return convertToChunk(extract()); + return {}; } -Chunk Squashing::convertToChunk(std::vector && chunks) const +Chunk Squashing::convertToChunk(CurrentData && data) const { - if (chunks.empty()) + if (data.chunks.empty()) return {}; auto info = std::make_shared(); - info->chunks = std::move(chunks); + info->chunks = std::move(data.chunks); - chunks.clear(); + // It is imortant that chunk is not empty, it has to have columns even if they are empty + // Sometimes there are could be no columns in header but not empty rows in chunks + // That happens when we intend to add defaults for the missing columns after + auto aggr_chunk = Chunk(header.getColumns(), 0); + if (header.columns() == 0) + aggr_chunk = Chunk(header.getColumns(), data.getRows()); - return Chunk(header.cloneEmptyColumns(), 0, info); + aggr_chunk.getChunkInfos().add(std::move(info)); + chassert(aggr_chunk); + return aggr_chunk; } -Chunk Squashing::squash(std::vector & input_chunks) +Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos) { - Chunk accumulated_chunk; std::vector mutable_columns = {}; size_t rows = 0; for (const Chunk & chunk : input_chunks) @@ -119,35 +127,17 @@ Chunk Squashing::squash(std::vector & input_chunks) for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { const auto source_column = columns[j]; - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } - accumulated_chunk.setColumns(std::move(mutable_columns), rows); - return accumulated_chunk; -} -const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) -{ - const auto& info = chunk.getChunkInfo(); - const auto * agg_info = typeid_cast(info.get()); + Chunk result; + result.setColumns(std::move(mutable_columns), rows); + result.setChunkInfos(infos); + result.getChunkInfos().appendIfUniq(std::move(input_chunks.back().getChunkInfos())); - if (!agg_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); - - return agg_info; -} - -void Squashing::expandCurrentSize(size_t rows, size_t bytes) -{ - accumulated_size.rows += rows; - accumulated_size.bytes += bytes; -} - -void Squashing::changeCurrentSize(size_t rows, size_t bytes) -{ - accumulated_size.rows = rows; - accumulated_size.bytes = bytes; + chassert(result); + return result; } bool Squashing::isEnoughSize(size_t rows, size_t bytes) const @@ -156,4 +146,29 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } + +bool Squashing::isEnoughSize() const +{ + return isEnoughSize(accumulated.getRows(), accumulated.getBytes()); +}; + +bool Squashing::isEnoughSize(const Chunk & chunk) const +{ + return isEnoughSize(chunk.getNumRows(), chunk.bytes()); +} + +void Squashing::CurrentData::add(Chunk && chunk) +{ + rows += chunk.getNumRows(); + bytes += chunk.bytes(); + chunks.push_back(std::move(chunk)); +} + +Squashing::CurrentData Squashing::extract() +{ + auto result = std::move(accumulated); + accumulated = {}; + return result; +} + } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index d76cca60e41..71ed4c4185a 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -8,9 +8,18 @@ namespace DB { -struct ChunksToSquash : public ChunkInfo +class ChunksToSquash : public ChunkInfoCloneable { - mutable std::vector chunks = {}; +public: + ChunksToSquash() = default; + ChunksToSquash(const ChunksToSquash & other) + { + chunks.reserve(other.chunks.size()); + for (const auto & chunk: other.chunks) + chunks.push_back(chunk.clone()); + } + + std::vector chunks = {}; }; /** Merging consecutive passed blocks to specified minimum size. @@ -36,34 +45,37 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); - bool isDataLeft() - { - return !chunks_to_merge_vec.empty(); - } + void setHeader(Block header_) { header = std::move(header_); } + const Block & getHeader() const { return header; } - Block header; private: - struct CurrentSize + struct CurrentData { + std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; + + explicit operator bool () const { return !chunks.empty(); } + size_t getRows() const { return rows; } + size_t getBytes() const { return bytes; } + void add(Chunk && chunk); }; - std::vector chunks_to_merge_vec = {}; - size_t min_block_size_rows; - size_t min_block_size_bytes; + const size_t min_block_size_rows; + const size_t min_block_size_bytes; + Block header; - CurrentSize accumulated_size; + CurrentData accumulated; - static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); + static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); - static Chunk squash(std::vector & input_chunks); - - void expandCurrentSize(size_t rows, size_t bytes); - void changeCurrentSize(size_t rows, size_t bytes); + bool isEnoughSize() const; bool isEnoughSize(size_t rows, size_t bytes) const; + bool isEnoughSize(const Chunk & chunk) const; - Chunk convertToChunk(std::vector && chunks) const; + CurrentData extract(); + + Chunk convertToChunk(CurrentData && data) const; }; } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 7d84efba1b5..572481e6b12 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -544,7 +544,13 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert_context->makeQueryContext(); addSettingsForQuery(insert_context, IAST::QueryKind::Insert); - InterpreterInsertQuery interpreter(query_ptr, insert_context); + InterpreterInsertQuery interpreter( + query_ptr, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 8e83233e54c..ed14fe4273c 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 6ec6a64b13d..1f7c6b1fe68 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index a3c5a7ed3ed..6ce6f5e454e 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } - /// Check for dynamic subcolums in unknown required columns. + /// Check for dynamic subcolumns in unknown required columns. if (!unknown_required_source_columns.empty()) { for (const NameAndTypePair & pair : source_columns_ordinary) diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 4e1a2bcf5ee..ac80ad2fb68 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include #include #include + #include diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 9c3f85128cf..1b57ad2b622 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -537,7 +538,7 @@ Chunk DDLQueryStatusSource::generate() ExecutionStatus status(-1, "Cannot obtain error message"); /// Replicated database retries in case of error, it should not write error status. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD bool need_check_status = true; #else bool need_check_status = !is_replicated_database; diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h index 25e1e7a5616..039bcbc2bca 100644 --- a/src/Interpreters/formatWithPossiblyHidingSecrets.h +++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h @@ -2,6 +2,8 @@ #include "Access/ContextAccess.h" #include "Interpreters/Context.h" +#include + namespace DB { struct SecretHidingFormatSettings diff --git a/src/Interpreters/getColumnFromBlock.cpp b/src/Interpreters/getColumnFromBlock.cpp index 2e70a58b5a1..89166bb2b3e 100644 --- a/src/Interpreters/getColumnFromBlock.cpp +++ b/src/Interpreters/getColumnFromBlock.cpp @@ -40,7 +40,7 @@ ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requ auto subcolumn_name = requested_subcolumn.getSubcolumnName(); /// If requested subcolumn is dynamic, we should first perform cast and then /// extract the subcolumn, because the data of dynamic subcolumn can change after cast. - if (elem->type->hasDynamicSubcolumns() && !elem->type->equals(*requested_column_type)) + if ((elem->type->hasDynamicSubcolumns() || requested_column_type->hasDynamicSubcolumns()) && !elem->type->equals(*requested_column_type)) { auto casted_column = castColumn({elem->column, elem->type, ""}, requested_column_type); auto elem_column = requested_column_type->tryGetSubcolumn(subcolumn_name, casted_column); diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp index 43be7c5f043..6f902c7cd7e 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index 06c5d424d2f..cf18cbbb54a 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/interpretSubquery.cpp b/src/Interpreters/interpretSubquery.cpp index 6b42345f1d6..340f6d1d805 100644 --- a/src/Interpreters/interpretSubquery.cpp +++ b/src/Interpreters/interpretSubquery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 9c3922b8bda..4d17891f9f8 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -384,7 +385,7 @@ static void maybeConvertOrdinaryDatabaseToAtomic(ContextMutablePtr context, cons if (database->getEngineName() != "Ordinary") return; - Strings permanently_detached_tables = database->getNamesOfPermanentlyDetachedTables(); + const Strings permanently_detached_tables = database->getNamesOfPermanentlyDetachedTables(); if (!permanently_detached_tables.empty()) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot automatically convert database {} from Ordinary to Atomic, " diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 3529863a623..b9fdaabede1 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -21,6 +22,18 @@ extern const int ILLEGAL_COLUMN; } +DataTypeValidationSettings::DataTypeValidationSettings(const DB::Settings& settings) + : allow_suspicious_low_cardinality_types(settings.allow_suspicious_low_cardinality_types) + , allow_experimental_object_type(settings.allow_experimental_object_type) + , allow_suspicious_fixed_string_types(settings.allow_suspicious_fixed_string_types) + , allow_experimental_variant_type(settings.allow_experimental_variant_type) + , allow_suspicious_variant_types(settings.allow_suspicious_variant_types) + , validate_nested_types(settings.validate_experimental_and_suspicious_types_inside_nested_types) + , allow_experimental_dynamic_type(settings.allow_experimental_dynamic_type) +{ +} + + void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidationSettings & settings) { auto validate_callback = [&](const IDataType & data_type) diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index e2d2bc97ff7..6f15c585e4f 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -2,28 +2,19 @@ #include #include -#include namespace DB { class Context; +struct Settings; struct DataTypeValidationSettings { DataTypeValidationSettings() = default; - explicit DataTypeValidationSettings(const Settings & settings) - : allow_suspicious_low_cardinality_types(settings.allow_suspicious_low_cardinality_types) - , allow_experimental_object_type(settings.allow_experimental_object_type) - , allow_suspicious_fixed_string_types(settings.allow_suspicious_fixed_string_types) - , allow_experimental_variant_type(settings.allow_experimental_variant_type) - , allow_suspicious_variant_types(settings.allow_suspicious_variant_types) - , validate_nested_types(settings.validate_experimental_and_suspicious_types_inside_nested_types) - , allow_experimental_dynamic_type(settings.allow_experimental_dynamic_type) - { - } + explicit DataTypeValidationSettings(const Settings & settings); bool allow_suspicious_low_cardinality_types = true; bool allow_experimental_object_type = true; diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp index dd20164925c..a6595d330fc 100644 --- a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -57,13 +58,13 @@ ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, c return query; if ((isUserDefinedFunctionQuery(query) - && context->getSettings().ignore_on_cluster_for_replicated_udf_queries + && context->getSettingsRef().ignore_on_cluster_for_replicated_udf_queries && context->getUserDefinedSQLObjectsStorage().isReplicated()) || (isAccessControlQuery(query) - && context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries + && context->getSettingsRef().ignore_on_cluster_for_replicated_access_entities_queries && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE)) || (isNamedCollectionQuery(query) - && context->getSettings().ignore_on_cluster_for_replicated_named_collections_queries + && context->getSettingsRef().ignore_on_cluster_for_replicated_named_collections_queries && NamedCollectionFactory::instance().usesReplicatedStorage())) { LOG_DEBUG(getLogger("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID()); diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index d56a2724914..f0f782c0a63 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include @@ -240,12 +242,12 @@ ASTPtr ASTCreateQuery::clone() const res->set(res->columns_list, columns_list->clone()); if (storage) res->set(res->storage, storage->clone()); - if (inner_storage) - res->set(res->inner_storage, inner_storage->clone()); if (select) res->set(res->select, select->clone()); if (table_overrides) res->set(res->table_overrides, table_overrides->clone()); + if (targets) + res->set(res->targets, targets->clone()); if (dictionary) { @@ -265,6 +267,16 @@ ASTPtr ASTCreateQuery::clone() const return res; } +String ASTCreateQuery::getID(char delim) const +{ + String res = attach ? "AttachQuery" : "CreateQuery"; + String database = getDatabase(); + if (!database.empty()) + res += (delim + getDatabase()); + res += (delim + getTable()); + return res; +} + void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { frame.need_parens = false; @@ -388,20 +400,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat refresh_strategy->formatImpl(settings, state, frame); } - if (to_table_id) + if (auto to_table_id = getTargetTableID(ViewTarget::To)) { - assert((is_materialized_view || is_window_view) && to_inner_uuid == UUIDHelpers::Nil); - settings.ostr - << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : "") - << (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "") - << backQuoteIfNeed(to_table_id.table_name); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO) + << (settings.hilite ? hilite_none : "") << " " + << (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "") + << backQuoteIfNeed(to_table_id.table_name); } - if (to_inner_uuid != UUIDHelpers::Nil) + if (auto to_inner_uuid = getTargetInnerUUID(ViewTarget::To); to_inner_uuid != UUIDHelpers::Nil) { - assert(is_materialized_view && !to_table_id); - settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO INNER UUID " << (settings.hilite ? hilite_none : "") - << quoteString(toString(to_inner_uuid)); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO_INNER_UUID) + << (settings.hilite ? hilite_none : "") << " " << quoteString(toString(to_inner_uuid)); } bool should_add_empty = is_create_empty; @@ -461,14 +471,17 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat frame.expression_list_always_start_on_new_line = false; - if (inner_storage) + if (storage) + storage->formatImpl(settings, state, frame); + + if (auto inner_storage = getTargetInnerEngine(ViewTarget::Inner)) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " INNER" << (settings.hilite ? hilite_none : ""); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::INNER) << (settings.hilite ? hilite_none : ""); inner_storage->formatImpl(settings, state, frame); } - if (storage) - storage->formatImpl(settings, state, frame); + if (auto to_storage = getTargetInnerEngine(ViewTarget::To)) + to_storage->formatImpl(settings, state, frame); if (dictionary) dictionary->formatImpl(settings, state, frame); @@ -528,48 +541,57 @@ bool ASTCreateQuery::isParameterizedView() const } -ASTCreateQuery::UUIDs::UUIDs(const ASTCreateQuery & query) - : uuid(query.uuid) - , to_inner_uuid(query.to_inner_uuid) +void ASTCreateQuery::generateRandomUUIDs() { + CreateQueryUUIDs{*this, /* generate_random= */ true}.copyToQuery(*this); } -String ASTCreateQuery::UUIDs::toString() const +void ASTCreateQuery::resetUUIDs() { - WriteBufferFromOwnString out; - out << "{" << uuid << "," << to_inner_uuid << "}"; - return out.str(); + CreateQueryUUIDs{}.copyToQuery(*this); } -ASTCreateQuery::UUIDs ASTCreateQuery::UUIDs::fromString(const String & str) + +StorageID ASTCreateQuery::getTargetTableID(ViewTarget::Kind target_kind) const { - ReadBufferFromString in{str}; - ASTCreateQuery::UUIDs res; - in >> "{" >> res.uuid >> "," >> res.to_inner_uuid >> "}"; - return res; + if (targets) + return targets->getTableID(target_kind); + return StorageID::createEmpty(); } -ASTCreateQuery::UUIDs ASTCreateQuery::generateRandomUUID(bool always_generate_new_uuid) +bool ASTCreateQuery::hasTargetTableID(ViewTarget::Kind target_kind) const { - if (always_generate_new_uuid) - setUUID({}); - - if (uuid == UUIDHelpers::Nil) - uuid = UUIDHelpers::generateV4(); - - /// If destination table (to_table_id) is not specified for materialized view, - /// then MV will create inner table. We should generate UUID of inner table here. - bool need_uuid_for_inner_table = !attach && is_materialized_view && !to_table_id; - if (need_uuid_for_inner_table && (to_inner_uuid == UUIDHelpers::Nil)) - to_inner_uuid = UUIDHelpers::generateV4(); - - return UUIDs{*this}; + if (targets) + return targets->hasTableID(target_kind); + return false; } -void ASTCreateQuery::setUUID(const UUIDs & uuids) +UUID ASTCreateQuery::getTargetInnerUUID(ViewTarget::Kind target_kind) const { - uuid = uuids.uuid; - to_inner_uuid = uuids.to_inner_uuid; + if (targets) + return targets->getInnerUUID(target_kind); + return UUIDHelpers::Nil; +} + +bool ASTCreateQuery::hasInnerUUIDs() const +{ + if (targets) + return targets->hasInnerUUIDs(); + return false; +} + +std::shared_ptr ASTCreateQuery::getTargetInnerEngine(ViewTarget::Kind target_kind) const +{ + if (targets) + return targets->getInnerEngine(target_kind); + return nullptr; +} + +void ASTCreateQuery::setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def) +{ + if (!targets) + set(targets, std::make_shared()); + targets->setInnerEngine(target_kind, storage_def); } } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 6fbf045915b..a95010aea31 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ namespace DB class ASTFunction; class ASTSetQuery; class ASTSelectWithUnionQuery; +struct CreateQueryUUIDs; class ASTStorage : public IAST @@ -101,17 +103,15 @@ public: bool has_uuid{false}; // CREATE TABLE x UUID '...' ASTColumns * columns_list = nullptr; - - StorageID to_table_id = StorageID::createEmpty(); /// For CREATE MATERIALIZED VIEW mv TO table. - UUID to_inner_uuid = UUIDHelpers::Nil; /// For materialized view with inner table - ASTStorage * inner_storage = nullptr; /// For window view with inner table ASTStorage * storage = nullptr; + ASTPtr watermark_function; ASTPtr lateness_function; String as_database; String as_table; IAST * as_table_function = nullptr; ASTSelectWithUnionQuery * select = nullptr; + ASTViewTargets * targets = nullptr; IAST * comment = nullptr; ASTPtr sql_security = nullptr; @@ -136,7 +136,7 @@ public: bool create_or_replace{false}; /** Get the text that identifies this element. */ - String getID(char delim) const override { return (attach ? "AttachQuery" : "CreateQuery") + (delim + getDatabase()) + delim + getTable(); } + String getID(char delim) const override; ASTPtr clone() const override; @@ -153,17 +153,26 @@ public: QueryKind getQueryKind() const override { return QueryKind::Create; } - struct UUIDs - { - UUID uuid = UUIDHelpers::Nil; - UUID to_inner_uuid = UUIDHelpers::Nil; - UUIDs() = default; - explicit UUIDs(const ASTCreateQuery & query); - String toString() const; - static UUIDs fromString(const String & str); - }; - UUIDs generateRandomUUID(bool always_generate_new_uuid = false); - void setUUID(const UUIDs & uuids); + /// Generates a random UUID for this create query if it's not specified already. + /// The function also generates random UUIDs for inner target tables if this create query implies that + /// (for example, if it's a `CREATE MATERIALIZED VIEW` query with an inner storage). + void generateRandomUUIDs(); + + /// Removes UUID from this create query. + /// The function also removes UUIDs for inner target tables from this create query (see also generateRandomUUID()). + void resetUUIDs(); + + /// Returns information about a target table. + /// If that information isn't specified in this create query (or even not allowed) then the function returns an empty value. + StorageID getTargetTableID(ViewTarget::Kind target_kind) const; + bool hasTargetTableID(ViewTarget::Kind target_kind) const; + UUID getTargetInnerUUID(ViewTarget::Kind target_kind) const; + bool hasInnerUUIDs() const; + std::shared_ptr getTargetInnerEngine(ViewTarget::Kind target_kind) const; + void setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def); + + bool is_materialized_view_with_external_target() const { return is_materialized_view && hasTargetTableID(ViewTarget::To); } + bool is_materialized_view_with_inner_table() const { return is_materialized_view && !hasTargetTableID(ViewTarget::To); } protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; @@ -171,8 +180,8 @@ protected: void forEachPointerToChild(std::function f) override { f(reinterpret_cast(&columns_list)); - f(reinterpret_cast(&inner_storage)); f(reinterpret_cast(&storage)); + f(reinterpret_cast(&targets)); f(reinterpret_cast(&as_table_function)); f(reinterpret_cast(&select)); f(reinterpret_cast(&comment)); diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index f39229d7566..230d4c778e8 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -19,9 +18,6 @@ #include #include #include -#include - -#include using namespace std::literals; @@ -333,19 +329,23 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format const auto * literal = arguments->children[0]->as(); const auto * function = arguments->children[0]->as(); + const auto * subquery = arguments->children[0]->as(); bool is_tuple = literal && literal->value.getType() == Field::Types::Tuple; - // do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))` + /// Do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))` bool literal_need_parens = literal && !is_tuple; - // negate always requires parentheses, otherwise -(-1) will be printed as --1 - bool inside_parens = name == "negate" && (literal_need_parens || (function && function->name == "negate")); + /// Negate always requires parentheses, otherwise -(-1) will be printed as --1 + /// Also extra parentheses are needed for subqueries, because NOT can be parsed as a function: + /// not(SELECT 1) cannot be parsed, while not((SELECT 1)) can. + bool inside_parens = (name == "negate" && (literal_need_parens || (function && function->name == "negate"))) + || (subquery && name == "not"); /// We DO need parentheses around a single literal /// For example, SELECT (NOT 0) + (NOT 0) cannot be transformed into SELECT NOT 0 + NOT 0, since /// this is equal to SELECT NOT (0 + NOT 0) bool outside_parens = frame.need_parens && !inside_parens; - // do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1))) + /// Do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1))) if (inside_parens) nested_need_parens.need_parens = false; @@ -632,6 +632,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format settings.ostr << ", "; if (arguments->children[i]->as()) settings.ostr << "SETTINGS "; + nested_dont_need_parens.list_element_index = i; arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); } settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); @@ -642,12 +643,14 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format { settings.ostr << (settings.hilite ? hilite_operator : "") << ((frame.need_parens && !alias.empty()) ? "tuple" : "") << '(' << (settings.hilite ? hilite_none : ""); + for (size_t i = 0; i < arguments->children.size(); ++i) { if (i != 0) settings.ostr << ", "; if (arguments->children[i]->as()) settings.ostr << "SETTINGS "; + nested_dont_need_parens.list_element_index = i; arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); } settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); @@ -663,6 +666,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format settings.ostr << ", "; if (arguments->children[i]->as()) settings.ostr << "SETTINGS "; + nested_dont_need_parens.list_element_index = i; arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); } settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTViewTargets.cpp b/src/Parsers/ASTViewTargets.cpp new file mode 100644 index 00000000000..8ee98e704df --- /dev/null +++ b/src/Parsers/ASTViewTargets.cpp @@ -0,0 +1,300 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + + +std::string_view toString(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return "to"; + case ViewTarget::Inner: return "inner"; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "{} doesn't support kind {}", __FUNCTION__, kind); +} + +void parseFromString(ViewTarget::Kind & out, std::string_view str) +{ + for (auto kind : magic_enum::enum_values()) + { + if (toString(kind) == str) + { + out = kind; + return; + } + } + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: Unexpected string {}", __FUNCTION__, str); +} + + +std::vector ASTViewTargets::getKinds() const +{ + std::vector kinds; + kinds.reserve(targets.size()); + for (const auto & target : targets) + kinds.push_back(target.kind); + return kinds; +} + + +void ASTViewTargets::setTableID(ViewTarget::Kind kind, const StorageID & table_id_) +{ + for (auto & target : targets) + { + if (target.kind == kind) + { + target.table_id = table_id_; + return; + } + } + if (table_id_) + targets.emplace_back(kind).table_id = table_id_; +} + +StorageID ASTViewTargets::getTableID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->table_id; + return StorageID::createEmpty(); +} + +bool ASTViewTargets::hasTableID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return !target->table_id.empty(); + return false; +} + +void ASTViewTargets::setCurrentDatabase(const String & current_database) +{ + for (auto & target : targets) + { + auto & table_id = target.table_id; + if (!table_id.table_name.empty() && table_id.database_name.empty()) + table_id.database_name = current_database; + } +} + +void ASTViewTargets::setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_) +{ + for (auto & target : targets) + { + if (target.kind == kind) + { + target.inner_uuid = inner_uuid_; + return; + } + } + if (inner_uuid_ != UUIDHelpers::Nil) + targets.emplace_back(kind).inner_uuid = inner_uuid_; +} + +UUID ASTViewTargets::getInnerUUID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->inner_uuid; + return UUIDHelpers::Nil; +} + +bool ASTViewTargets::hasInnerUUID(ViewTarget::Kind kind) const +{ + return getInnerUUID(kind) != UUIDHelpers::Nil; +} + +void ASTViewTargets::resetInnerUUIDs() +{ + for (auto & target : targets) + target.inner_uuid = UUIDHelpers::Nil; +} + +bool ASTViewTargets::hasInnerUUIDs() const +{ + for (const auto & target : targets) + { + if (target.inner_uuid != UUIDHelpers::Nil) + return true; + } + return false; +} + +void ASTViewTargets::setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def) +{ + auto new_inner_engine = typeid_cast>(storage_def); + if (!new_inner_engine && storage_def) + throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Bad cast from type {} to ASTStorage", storage_def->getID()); + + for (auto & target : targets) + { + if (target.kind == kind) + { + if (target.inner_engine == new_inner_engine) + return; + if (new_inner_engine) + children.push_back(new_inner_engine); + if (target.inner_engine) + std::erase(children, target.inner_engine); + target.inner_engine = new_inner_engine; + return; + } + } + + if (new_inner_engine) + { + targets.emplace_back(kind).inner_engine = new_inner_engine; + children.push_back(new_inner_engine); + } +} + +std::shared_ptr ASTViewTargets::getInnerEngine(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->inner_engine; + return nullptr; +} + +std::vector> ASTViewTargets::getInnerEngines() const +{ + std::vector> res; + res.reserve(targets.size()); + for (const auto & target : targets) + { + if (target.inner_engine) + res.push_back(target.inner_engine); + } + return res; +} + +const ViewTarget * ASTViewTargets::tryGetTarget(ViewTarget::Kind kind) const +{ + for (const auto & target : targets) + { + if (target.kind == kind) + return ⌖ + } + return nullptr; +} + +ASTPtr ASTViewTargets::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + for (auto & target : res->targets) + { + if (target.inner_engine) + { + target.inner_engine = typeid_cast>(target.inner_engine->clone()); + res->children.push_back(target.inner_engine); + } + } + return res; +} + +void ASTViewTargets::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + for (const auto & target : targets) + formatTarget(target, s, state, frame); +} + +void ASTViewTargets::formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + for (const auto & target : targets) + { + if (target.kind == kind) + formatTarget(target, s, state, frame); + } +} + +void ASTViewTargets::formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame) +{ + if (target.table_id) + { + auto keyword = getKeywordForTableID(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No keyword for table name of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) + << (s.hilite ? hilite_none : "") << " " + << (!target.table_id.database_name.empty() ? backQuoteIfNeed(target.table_id.database_name) + "." : "") + << backQuoteIfNeed(target.table_id.table_name); + } + + if (target.inner_uuid != UUIDHelpers::Nil) + { + auto keyword = getKeywordForInnerUUID(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for inner UUID of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) + << (s.hilite ? hilite_none : "") << " " << quoteString(toString(target.inner_uuid)); + } + + if (target.inner_engine) + { + auto keyword = getKeywordForInnerStorage(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for table engine of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) << (s.hilite ? hilite_none : ""); + target.inner_engine->formatImpl(s, state, frame); + } +} + +std::optional ASTViewTargets::getKeywordForTableID(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return Keyword::TO; /// TO mydb.mydata + case ViewTarget::Inner: return std::nullopt; + } + UNREACHABLE(); +} + +std::optional ASTViewTargets::getKeywordForInnerStorage(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return std::nullopt; /// ENGINE = MergeTree() + case ViewTarget::Inner: return Keyword::INNER; /// INNER ENGINE = MergeTree() + } + UNREACHABLE(); +} + +std::optional ASTViewTargets::getKeywordForInnerUUID(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return Keyword::TO_INNER_UUID; /// TO INNER UUID 'XXX' + case ViewTarget::Inner: return std::nullopt; + } + UNREACHABLE(); +} + +void ASTViewTargets::forEachPointerToChild(std::function f) +{ + for (auto & target : targets) + { + if (target.inner_engine) + { + ASTStorage * new_inner_engine = target.inner_engine.get(); + f(reinterpret_cast(&new_inner_engine)); + if (new_inner_engine != target.inner_engine.get()) + { + if (new_inner_engine) + target.inner_engine = typeid_cast>(new_inner_engine->ptr()); + else + target.inner_engine.reset(); + } + } + } +} + +} diff --git a/src/Parsers/ASTViewTargets.h b/src/Parsers/ASTViewTargets.h new file mode 100644 index 00000000000..12182919f0e --- /dev/null +++ b/src/Parsers/ASTViewTargets.h @@ -0,0 +1,115 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ASTStorage; +enum class Keyword : size_t; + +/// Information about target tables (external or inner) of a materialized view or a window view. +/// See ASTViewTargets for more details. +struct ViewTarget +{ + enum Kind + { + /// If `kind == ViewTarget::To` then `ViewTarget` contains information about the "TO" table of a materialized view or a window view: + /// CREATE MATERIALIZED VIEW db.mv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ... + /// or + /// CREATE WINDOW VIEW db.wv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ... + To, + + /// If `kind == ViewTarget::Inner` then `ViewTarget` contains information about the "INNER" table of a window view: + /// CREATE WINDOW VIEW db.wv_name {INNER ENGINE inner_engine} AS SELECT ... + Inner, + }; + + Kind kind = To; + + /// StorageID of the target table, if it's not inner. + /// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ... + StorageID table_id = StorageID::createEmpty(); + + /// UUID of the target table, if it's inner. + /// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like + /// CREATE MATERIALIZED VIEW ... TO INNER UUID ... + UUID inner_uuid = UUIDHelpers::Nil; + + /// Table engine of the target table, if it's inner. + /// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ... + std::shared_ptr inner_engine; +}; + +/// Converts ViewTarget::Kind to a string. +std::string_view toString(ViewTarget::Kind kind); +void parseFromString(ViewTarget::Kind & out, std::string_view str); + + +/// Information about all target tables (external or inner) of a view. +/// +/// For example, for a materialized view: +/// CREATE MATERIALIZED VIEW db.mv_name [TO [db.]to_target | ENGINE to_engine] AS SELECT ... +/// this class contains information about the "TO" table: its name and database (if it's external), its UUID and engine (if it's inner). +/// +/// For a window view: +/// CREATE WINDOW VIEW db.wv_name [TO [db.]to_target | ENGINE to_engine] [INNER ENGINE inner_engine] AS SELECT ... +/// this class contains information about both the "TO" table and the "INNER" table. +class ASTViewTargets : public IAST +{ +public: + std::vector targets; + + /// Sets the StorageID of the target table, if it's not inner. + /// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ... + void setTableID(ViewTarget::Kind kind, const StorageID & table_id_); + StorageID getTableID(ViewTarget::Kind kind) const; + bool hasTableID(ViewTarget::Kind kind) const; + + /// Replaces an empty database in the StorageID of the target table with a specified database. + void setCurrentDatabase(const String & current_database); + + /// Sets the UUID of the target table, if it's inner. + /// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like + /// CREATE MATERIALIZED VIEW ... TO INNER UUID ... + void setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_); + UUID getInnerUUID(ViewTarget::Kind kind) const; + bool hasInnerUUID(ViewTarget::Kind kind) const; + + void resetInnerUUIDs(); + bool hasInnerUUIDs() const; + + /// Sets the table engine of the target table, if it's inner. + /// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ... + void setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def); + std::shared_ptr getInnerEngine(ViewTarget::Kind kind) const; + std::vector> getInnerEngines() const; + + /// Returns a list of all kinds of views in this ASTViewTargets. + std::vector getKinds() const; + + /// Returns information about a target table. + /// The function returns null if such target doesn't exist. + const ViewTarget * tryGetTarget(ViewTarget::Kind kind) const; + + String getID(char) const override { return "ViewTargets"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + /// Formats information only about a specific target table. + void formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const; + static void formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame); + + /// Helper functions for class ParserViewTargets. Returns a prefix keyword matching a specified target kind. + static std::optional getKeywordForTableID(ViewTarget::Kind kind); + static std::optional getKeywordForInnerUUID(ViewTarget::Kind kind); + static std::optional getKeywordForInnerStorage(ViewTarget::Kind kind); + +protected: + void forEachPointerToChild(std::function f) override; +}; + +} diff --git a/src/Parsers/CreateQueryUUIDs.cpp b/src/Parsers/CreateQueryUUIDs.cpp new file mode 100644 index 00000000000..4dfee67b537 --- /dev/null +++ b/src/Parsers/CreateQueryUUIDs.cpp @@ -0,0 +1,168 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ + +CreateQueryUUIDs::CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random, bool force_random) +{ + if (!generate_random || !force_random) + { + uuid = query.uuid; + if (query.targets) + { + for (const auto & target : query.targets->targets) + setTargetInnerUUID(target.kind, target.inner_uuid); + } + } + + if (generate_random) + { + if (uuid == UUIDHelpers::Nil) + uuid = UUIDHelpers::generateV4(); + + /// For an ATTACH query we should never generate UUIDs for its inner target tables + /// because for an ATTACH query those inner target tables probably already exist and can be accessible by names. + /// If we generate random UUIDs for already existing tables then those UUIDs will not be correct making those inner target table inaccessible. + /// Thus it's not safe for example to replace + /// "ATTACH MATERIALIZED VIEW mv AS SELECT a FROM b" with + /// "ATTACH MATERIALIZED VIEW mv TO INNER UUID "XXXX" AS SELECT a FROM b" + /// This replacement is safe only for CREATE queries when inner target tables don't exist yet. + if (!query.attach) + { + auto generate_target_uuid = [&](ViewTarget::Kind target_kind) + { + if ((query.getTargetInnerUUID(target_kind) == UUIDHelpers::Nil) && query.getTargetTableID(target_kind).empty()) + setTargetInnerUUID(target_kind, UUIDHelpers::generateV4()); + }; + + /// If destination table (to_table_id) is not specified for materialized view, + /// then MV will create inner table. We should generate UUID of inner table here. + if (query.is_materialized_view) + generate_target_uuid(ViewTarget::To); + } + } +} + +bool CreateQueryUUIDs::empty() const +{ + if (uuid != UUIDHelpers::Nil) + return false; + for (const auto & [_, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + return false; + } + return true; +} + +String CreateQueryUUIDs::toString() const +{ + WriteBufferFromOwnString out; + out << "{"; + bool need_comma = false; + auto add_name_and_uuid_to_string = [&](std::string_view name_, const UUID & uuid_) + { + if (std::exchange(need_comma, true)) + out << ", "; + out << "\"" << name_ << "\": \"" << uuid_ << "\""; + }; + if (uuid != UUIDHelpers::Nil) + add_name_and_uuid_to_string("uuid", uuid); + for (const auto & [kind, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + add_name_and_uuid_to_string(::DB::toString(kind), inner_uuid); + } + out << "}"; + return out.str(); +} + +CreateQueryUUIDs CreateQueryUUIDs::fromString(const String & str) +{ + ReadBufferFromString in{str}; + CreateQueryUUIDs res; + skipWhitespaceIfAny(in); + in >> "{"; + skipWhitespaceIfAny(in); + char c; + while (in.peek(c) && c != '}') + { + String name; + String value; + readDoubleQuotedString(name, in); + skipWhitespaceIfAny(in); + in >> ":"; + skipWhitespaceIfAny(in); + readDoubleQuotedString(value, in); + skipWhitespaceIfAny(in); + if (name == "uuid") + { + res.uuid = parse(value); + } + else + { + ViewTarget::Kind kind; + parseFromString(kind, name); + res.setTargetInnerUUID(kind, parse(value)); + } + if (in.peek(c) && c == ',') + { + in.ignore(1); + skipWhitespaceIfAny(in); + } + } + in >> "}"; + return res; +} + +void CreateQueryUUIDs::setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid) +{ + for (auto & pair : targets_inner_uuids) + { + if (pair.first == kind) + { + pair.second = new_inner_uuid; + return; + } + } + if (new_inner_uuid != UUIDHelpers::Nil) + targets_inner_uuids.emplace_back(kind, new_inner_uuid); +} + +UUID CreateQueryUUIDs::getTargetInnerUUID(ViewTarget::Kind kind) const +{ + for (const auto & pair : targets_inner_uuids) + { + if (pair.first == kind) + return pair.second; + } + return UUIDHelpers::Nil; +} + +void CreateQueryUUIDs::copyToQuery(ASTCreateQuery & query) const +{ + query.uuid = uuid; + + if (query.targets) + query.targets->resetInnerUUIDs(); + + if (!targets_inner_uuids.empty()) + { + if (!query.targets) + query.set(query.targets, std::make_shared()); + + for (const auto & [kind, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + query.targets->setInnerUUID(kind, inner_uuid); + } + } +} + +} diff --git a/src/Parsers/CreateQueryUUIDs.h b/src/Parsers/CreateQueryUUIDs.h new file mode 100644 index 00000000000..419dad24b35 --- /dev/null +++ b/src/Parsers/CreateQueryUUIDs.h @@ -0,0 +1,40 @@ +#pragma once + +#include + + +namespace DB +{ +class ASTCreateQuery; + +/// The UUID of a table or a database defined with a CREATE QUERY along with the UUIDs of its inner targets. +struct CreateQueryUUIDs +{ + CreateQueryUUIDs() = default; + + /// Collect UUIDs from ASTCreateQuery. + /// Parameters: + /// `generate_random` - if it's true then unspecified in the query UUIDs will be generated randomly; + /// `force_random` - if it's true then all UUIDs (even specified in the query) will be (re)generated randomly. + explicit CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random = false, bool force_random = false); + + bool empty() const; + explicit operator bool() const { return !empty(); } + + String toString() const; + static CreateQueryUUIDs fromString(const String & str); + + void setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid); + UUID getTargetInnerUUID(ViewTarget::Kind kind) const; + + /// Copies UUIDs to ASTCreateQuery. + void copyToQuery(ASTCreateQuery & query) const; + + /// UUID of the table. + UUID uuid = UUIDHelpers::Nil; + + /// UUIDs of its target table (or tables). + std::vector> targets_inner_uuids; +}; + +} diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index fff8383e7b3..f97c042e91e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2743,7 +2743,7 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po /// 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator if (op.function_name == "and" && layers.back()->between_counter) { - layers.back()->between_counter--; + --layers.back()->between_counter; op = finish_between_operator; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 014dc7bd3bf..41379a845e7 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -693,7 +694,8 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ASTPtr table; ASTPtr columns_list; - ASTPtr storage; + std::shared_ptr storage; + ASTPtr targets; ASTPtr as_database; ASTPtr as_table; ASTPtr as_table_function; @@ -773,6 +775,17 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return true; } + auto parse_storage = [&] + { + chassert(!storage); + ASTPtr ast; + if (!storage_p.parse(pos, ast, expected)) + return false; + + storage = typeid_cast>(ast); + return true; + }; + auto need_parse_as_select = [&is_create_empty, &pos, &expected]() { if (ParserKeyword{Keyword::EMPTY_AS}.ignore(pos, expected)) @@ -798,7 +811,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!s_rparen.ignore(pos, expected)) return false; - auto storage_parse_result = storage_p.parse(pos, storage, expected); + auto storage_parse_result = parse_storage(); if ((storage_parse_result || is_temporary) && need_parse_as_select()) { @@ -820,7 +833,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe */ else { - storage_p.parse(pos, storage, expected); + parse_storage(); /// CREATE|ATTACH TABLE ... AS ... if (need_parse_as_select()) @@ -843,7 +856,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// Optional - ENGINE can be specified. if (!storage) - storage_p.parse(pos, storage, expected); + parse_storage(); } } } @@ -904,6 +917,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); query->is_create_empty = is_create_empty; if (from_path) @@ -977,6 +991,13 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e return false; } + std::shared_ptr targets; + if (to_table) + { + targets = std::make_shared(); + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + } + /// Optional - a list of columns can be specified. It must fully comply with SELECT. if (s_lparen.ignore(pos, expected)) { @@ -1017,14 +1038,12 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - query->set(query->columns_list, columns_list); tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); if (comment) query->set(query->comment, comment); @@ -1139,6 +1158,18 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & storage_p.parse(pos, storage, expected); } + std::shared_ptr targets; + if (to_table || storage || inner_storage) + { + targets = std::make_shared(); + if (to_table) + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + if (storage) + targets->setInnerEngine(ViewTarget::To, storage); + if (inner_storage) + targets->setInnerEngine(ViewTarget::Inner, inner_storage); + } + // WATERMARK if (s_watermark.ignore(pos, expected)) { @@ -1195,12 +1226,8 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - query->set(query->columns_list, columns_list); - query->set(query->storage, storage); - query->set(query->inner_storage, inner_storage); + query->is_watermark_strictly_ascending = is_watermark_strictly_ascending; query->is_watermark_ascending = is_watermark_ascending; query->is_watermark_bounded = is_watermark_bounded; @@ -1213,6 +1240,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); return true; } @@ -1436,6 +1464,7 @@ bool ParserCreateDatabaseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e return true; } + bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create(Keyword::CREATE); @@ -1622,13 +1651,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - if (to_inner_uuid) - query->to_inner_uuid = parseFromString(to_inner_uuid->as()->value.get()); - query->set(query->columns_list, columns_list); - query->set(query->storage, storage); + if (refresh_strategy) query->set(query->refresh_strategy, refresh_strategy); if (comment) @@ -1639,29 +1663,41 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (query->columns_list && query->columns_list->primary_key) { /// If engine is not set will use default one - if (!query->storage) - query->set(query->storage, std::make_shared()); - else if (query->storage->primary_key) + if (!storage) + storage = std::make_shared(); + auto & storage_ref = typeid_cast(*storage); + if (storage_ref.primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); - - query->storage->primary_key = query->columns_list->primary_key; - + storage_ref.primary_key = query->columns_list->primary_key; } if (query->columns_list && (query->columns_list->primary_key_from_columns)) { /// If engine is not set will use default one - if (!query->storage) - query->set(query->storage, std::make_shared()); - else if (query->storage->primary_key) + if (!storage) + storage = std::make_shared(); + auto & storage_ref = typeid_cast(*storage); + if (storage_ref.primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + storage_ref.primary_key = query->columns_list->primary_key_from_columns; + } - query->storage->primary_key = query->columns_list->primary_key_from_columns; + std::shared_ptr targets; + if (to_table || to_inner_uuid || storage) + { + targets = std::make_shared(); + if (to_table) + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + if (to_inner_uuid) + targets->setInnerUUID(ViewTarget::To, parseFromString(to_inner_uuid->as()->value.safeGet())); + if (storage) + targets->setInnerEngine(ViewTarget::To, storage); } tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); return true; } diff --git a/src/Parsers/ParserViewTargets.cpp b/src/Parsers/ParserViewTargets.cpp new file mode 100644 index 00000000000..8f010882cdd --- /dev/null +++ b/src/Parsers/ParserViewTargets.cpp @@ -0,0 +1,88 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +ParserViewTargets::ParserViewTargets() +{ + for (auto kind : magic_enum::enum_values()) + accept_kinds.push_back(kind); +} + +bool ParserViewTargets::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserStringLiteral literal_p; + ParserStorage storage_p{ParserStorage::TABLE_ENGINE}; + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); + + std::shared_ptr res; + + auto result = [&] -> ASTViewTargets & + { + if (!res) + res = std::make_shared(); + return *res; + }; + + for (;;) + { + auto start = pos; + for (auto kind : accept_kinds) + { + auto current = pos; + + auto keyword = ASTViewTargets::getKeywordForInnerUUID(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (literal_p.parse(pos, ast, expected)) + { + result().setInnerUUID(kind, parseFromString(ast->as()->value.safeGet())); + break; + } + } + pos = current; + + keyword = ASTViewTargets::getKeywordForInnerStorage(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (storage_p.parse(pos, ast, expected)) + { + result().setInnerEngine(kind, ast); + break; + } + } + pos = current; + + keyword = ASTViewTargets::getKeywordForTableID(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (table_name_p.parse(pos, ast, expected)) + { + result().setTableID(kind, ast->as()->getTableId()); + break; + } + } + pos = current; + } + if (pos == start) + break; + } + + if (!res || res->targets.empty()) + return false; + + node = res; + return true; +} + +} diff --git a/src/Parsers/ParserViewTargets.h b/src/Parsers/ParserViewTargets.h new file mode 100644 index 00000000000..3af3c0b8df3 --- /dev/null +++ b/src/Parsers/ParserViewTargets.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// Parses information about target tables (external or inner) of a materialized view or a window view. +/// The function parses one or multiple parts of a CREATE query looking like this: +/// TO db.table_name +/// TO INNER UUID 'XXX' +/// {ENGINE / INNER ENGINE} TableEngine(arguments) [ORDER BY ...] [SETTINGS ...] +/// Returns ASTViewTargets if succeeded. +class ParserViewTargets : public IParserBase +{ +public: + ParserViewTargets(); + explicit ParserViewTargets(const std::vector & accept_kinds_) : accept_kinds(accept_kinds_) { } + +protected: + const char * getName() const override { return "ViewTargets"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + + std::vector accept_kinds; +}; + +} diff --git a/src/Parsers/isUnquotedIdentifier.cpp b/src/Parsers/isUnquotedIdentifier.cpp index 6f2442635ec..26cb3992a50 100644 --- a/src/Parsers/isUnquotedIdentifier.cpp +++ b/src/Parsers/isUnquotedIdentifier.cpp @@ -1,5 +1,6 @@ #include +#include #include namespace DB @@ -7,6 +8,18 @@ namespace DB bool isUnquotedIdentifier(const String & name) { + auto is_keyword = [&name](Keyword keyword) + { + auto s = toStringView(keyword); + if (name.size() != s.size()) + return false; + return strncasecmp(s.data(), name.data(), s.size()) == 0; + }; + + /// Special keywords are parsed as literals instead of identifiers. + if (is_keyword(Keyword::NULL_KEYWORD) || is_keyword(Keyword::TRUE_KEYWORD) || is_keyword(Keyword::FALSE_KEYWORD)) + return false; + Lexer lexer(name.data(), name.data() + name.size()); auto maybe_ident = lexer.nextToken(); diff --git a/src/Parsers/isUnquotedIdentifier.h b/src/Parsers/isUnquotedIdentifier.h index 839e5860ad3..9c9f9239eb3 100644 --- a/src/Parsers/isUnquotedIdentifier.h +++ b/src/Parsers/isUnquotedIdentifier.h @@ -5,6 +5,14 @@ namespace DB { +/// Checks if the input string @name is a valid unquoted identifier. +/// +/// Example Usage: +/// abc -> true (valid unquoted identifier) +/// 123 -> false (identifiers cannot start with digits) +/// `123` -> false (quoted identifiers are not considered) +/// `abc` -> false (quoted identifiers are not considered) +/// null -> false (reserved literal keyword) bool isUnquotedIdentifier(const String & name); } diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index 52a0d748d63..9cac4e61380 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 49c4e264f7d..a35ba69d459 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -3,7 +3,8 @@ #include #include #include -#include +#include +#include #include #include @@ -40,6 +41,7 @@ #include #include +#include #include #include @@ -743,7 +745,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, { auto & interpolate_node_typed = interpolate_node->as(); - PlannerActionsVisitor planner_actions_visitor(planner_context); + PlannerActionsVisitor planner_actions_visitor( + planner_context, + /* use_column_identifier_as_action_node_name_, (default value)*/ true, + /// Prefer the INPUT to CONSTANT nodes (actions must be non constant) + /* always_use_const_column_for_constant_nodes */ false); + auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, interpolate_node_typed.getExpression()); if (expression_to_interpolate_expression_nodes.size() != 1) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 7a12d5d690d..6fd37df11c6 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -30,6 +30,8 @@ #include #include +#include + namespace DB { @@ -485,16 +487,33 @@ public: return node; } - const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) + [[nodiscard]] String addConstantIfNecessary( + const std::string & node_name, const ColumnWithTypeAndName & column, bool always_use_const_column_for_constant_nodes) { + chassert(column.column != nullptr); auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end() && (!always_use_const_column_for_constant_nodes || it->second->column)) + return {node_name}; + if (it != node_name_to_node.end()) - return it->second; + { + /// There is a node with this name, but it doesn't have a column + /// This likely happens because we executed the query until WithMergeableState with a const node in the + /// WHERE clause and, as the results of headers are materialized, the column was removed + /// Let's add a new column and keep this + String dupped_name{node_name + "_dupped"}; + if (node_name_to_node.find(dupped_name) != node_name_to_node.end()) + return dupped_name; + + const auto * node = &actions_dag.addColumn(column); + node_name_to_node[dupped_name] = node; + return dupped_name; + } const auto * node = &actions_dag.addColumn(column); node_name_to_node[node->result_name] = node; - return node; + return {node_name}; } template @@ -523,7 +542,7 @@ public: } private: - std::unordered_map node_name_to_node; + std::unordered_map node_name_to_node; ActionsDAG & actions_dag; QueryTreeNodePtr scope_node; }; @@ -531,9 +550,11 @@ private: class PlannerActionsVisitorImpl { public: - PlannerActionsVisitorImpl(ActionsDAG & actions_dag, + PlannerActionsVisitorImpl( + ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, - bool use_column_identifier_as_action_node_name_); + bool use_column_identifier_as_action_node_name_, + bool always_use_const_column_for_constant_nodes_); ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); @@ -593,14 +614,18 @@ private: const PlannerContextPtr planner_context; ActionNodeNameHelper action_node_name_helper; bool use_column_identifier_as_action_node_name; + bool always_use_const_column_for_constant_nodes; }; -PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag, +PlannerActionsVisitorImpl::PlannerActionsVisitorImpl( + ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, - bool use_column_identifier_as_action_node_name_) + bool use_column_identifier_as_action_node_name_, + bool always_use_const_column_for_constant_nodes_) : planner_context(planner_context_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) + , always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_) { actions_stack.emplace_back(actions_dag, nullptr); } @@ -723,17 +748,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi column.type = constant_type; column.column = column.type->createColumnConst(1, constant_literal); - actions_stack[0].addConstantIfNecessary(constant_node_name, column); + String final_name = actions_stack[0].addConstantIfNecessary(constant_node_name, column, always_use_const_column_for_constant_nodes); size_t actions_stack_size = actions_stack.size(); for (size_t i = 1; i < actions_stack_size; ++i) { auto & actions_stack_node = actions_stack[i]; - actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); + actions_stack_node.addInputConstantColumnIfNecessary(final_name, column); } - return {constant_node_name, Levels(0)}; - + return {final_name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node) @@ -862,16 +886,16 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma else column.column = std::move(column_set); - actions_stack[0].addConstantIfNecessary(column.name, column); + String final_name = actions_stack[0].addConstantIfNecessary(column.name, column, always_use_const_column_for_constant_nodes); size_t actions_stack_size = actions_stack.size(); for (size_t i = 1; i < actions_stack_size; ++i) { auto & actions_stack_node = actions_stack[i]; - actions_stack_node.addInputConstantColumnIfNecessary(column.name, column); + actions_stack_node.addInputConstantColumnIfNecessary(final_name, column); } - return {column.name, Levels(0)}; + return {final_name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node) @@ -1008,14 +1032,19 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi } -PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_) +PlannerActionsVisitor::PlannerActionsVisitor( + const PlannerContextPtr & planner_context_, + bool use_column_identifier_as_action_node_name_, + bool always_use_const_column_for_constant_nodes_) : planner_context(planner_context_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) + , always_use_const_column_for_constant_nodes(always_use_const_column_for_constant_nodes_) {} ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node) { - PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name); + PlannerActionsVisitorImpl actions_visitor_impl( + actions_dag, planner_context, use_column_identifier_as_action_node_name, always_use_const_column_for_constant_nodes); return actions_visitor_impl.visit(expression_node); } diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h index 6bb32047327..1dbd149bc4b 100644 --- a/src/Planner/PlannerActionsVisitor.h +++ b/src/Planner/PlannerActionsVisitor.h @@ -27,11 +27,17 @@ using PlannerContextPtr = std::shared_ptr; * During actions build, there is special handling for following functions: * 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added. * 2. For function `in` and its variants, already collected sets from planner context are used. + * 3. When building actions that use CONSTANT nodes, by default we ignore pre-existing INPUTs if those don't have + * a column (a const column always has a column). This is for compatibility with previous headers. We disable this + * behaviour when we explicitly want to override CONSTANT nodes with the input (resolving InterpolateNode for example) */ class PlannerActionsVisitor { public: - explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_ = true); + explicit PlannerActionsVisitor( + const PlannerContextPtr & planner_context_, + bool use_column_identifier_as_action_node_name_ = true, + bool always_use_const_column_for_constant_nodes_ = true); /** Add actions necessary to calculate expression node into expression dag. * Necessary actions are not added in actions dag output. @@ -42,6 +48,7 @@ public: private: const PlannerContextPtr planner_context; bool use_column_identifier_as_action_node_name = true; + bool always_use_const_column_for_constant_nodes = true; }; /** Calculate query tree expression node action dag name and add them into node to name map. diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index ceb506d1bbb..2b67c96d843 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace DB { @@ -85,14 +87,14 @@ bool canRemoveConstantFromGroupByKey(const ConstantNode & root) else if (function_node) { /// Do not allow removing constants like `hostName()` - if (!function_node->getFunctionOrThrow()->isDeterministic()) + if (function_node->getFunctionOrThrow()->isServerConstant()) return false; for (const auto & child : function_node->getArguments()) nodes.push(child.get()); } - else - return false; + // else + // return false; } return true; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index d18342880f0..4fedacbb48a 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include @@ -75,7 +77,6 @@ namespace ErrorCodes extern const int INVALID_JOIN_ON_EXPRESSION; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - extern const int SYNTAX_ERROR; extern const int ACCESS_DENIED; extern const int PARAMETER_OUT_OF_BOUND; extern const int TOO_MANY_COLUMNS; @@ -1395,12 +1396,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ { if (!join_clause.hasASOF()) throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "JOIN {} no inequality in ASOF JOIN ON section.", - join_node.formatASTForErrorMessage()); - - if (table_join_clause.key_names_left.size() <= 1) - throw Exception(ErrorCodes::SYNTAX_ERROR, - "JOIN {} ASOF join needs at least one equi-join column", + "JOIN {} no inequality in ASOF JOIN ON section", join_node.formatASTForErrorMessage()); } @@ -1522,7 +1518,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ { const auto & join_clause = table_join->getOnlyClause(); - bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind); + bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any) + && (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind)); + auto has_non_const = [](const Block & block, const auto & keys) { @@ -1542,7 +1540,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left) && has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right); - if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) + if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys) { auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left); auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 05420cc3301..d9360a58240 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -43,6 +43,7 @@ #include #include +#include #include namespace DB @@ -812,7 +813,7 @@ static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, query_context->getServerSettings().max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_joins}; return std::make_shared( - query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header, params); + query_context, table_join, query_context->getSettingsRef().max_threads, right_table_expression_header, params); } return std::make_shared(table_join, right_table_expression_header, query_context->getSettingsRef().join_any_take_last_row); diff --git a/src/Planner/PlannerSorting.cpp b/src/Planner/PlannerSorting.cpp index 611a26f78fa..1c6e820fed1 100644 --- a/src/Planner/PlannerSorting.cpp +++ b/src/Planner/PlannerSorting.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include diff --git a/src/Planner/PlannerWindowFunctions.cpp b/src/Planner/PlannerWindowFunctions.cpp index ce74d82c08d..225852de5a7 100644 --- a/src/Planner/PlannerWindowFunctions.cpp +++ b/src/Planner/PlannerWindowFunctions.cpp @@ -4,6 +4,8 @@ #include #include +#include + #include #include diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 18a6d297838..3c54c57a28c 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -33,6 +33,8 @@ #include #include +#include + #include #include #include diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index f2bc1f060d8..c89a70be541 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 5f6cf2f7230..4466be5b3a7 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -19,14 +19,6 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns checkNumRowsIsConsistent(); } -Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(std::move(columns_)) - , num_rows(num_rows_) - , chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - static Columns unmuteColumns(MutableColumns && mutable_columns) { Columns columns; @@ -43,17 +35,11 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) checkNumRowsIsConsistent(); } -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(unmuteColumns(std::move(columns_))) - , num_rows(num_rows_) - , chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - Chunk Chunk::clone() const { - return Chunk(getColumns(), getNumRows(), chunk_info); + auto tmp = Chunk(getColumns(), getNumRows()); + tmp.setChunkInfos(chunk_infos.clone()); + return tmp; } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 4f753798eaa..1348966c0d3 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,7 +1,9 @@ #pragma once +#include #include -#include + +#include namespace DB { @@ -9,11 +11,29 @@ namespace DB class ChunkInfo { public: - virtual ~ChunkInfo() = default; + using Ptr = std::shared_ptr; + ChunkInfo() = default; + ChunkInfo(const ChunkInfo&) = default; + ChunkInfo(ChunkInfo&&) = default; + + virtual Ptr clone() const = 0; + virtual ~ChunkInfo() = default; }; -using ChunkInfoPtr = std::shared_ptr; + +template +class ChunkInfoCloneable : public ChunkInfo +{ +public: + ChunkInfoCloneable() = default; + ChunkInfoCloneable(const ChunkInfoCloneable & other) = default; + + Ptr clone() const override + { + return std::static_pointer_cast(std::make_shared(*static_cast(this))); + } +}; /** * Chunk is a list of columns with the same length. @@ -32,26 +52,26 @@ using ChunkInfoPtr = std::shared_ptr; class Chunk { public: + using ChunkInfoCollection = CollectionOfDerivedItems; + Chunk() = default; Chunk(const Chunk & other) = delete; Chunk(Chunk && other) noexcept : columns(std::move(other.columns)) , num_rows(other.num_rows) - , chunk_info(std::move(other.chunk_info)) + , chunk_infos(std::move(other.chunk_infos)) { other.num_rows = 0; } Chunk(Columns columns_, UInt64 num_rows_); - Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk(MutableColumns columns_, UInt64 num_rows_); - Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept { columns = std::move(other.columns); - chunk_info = std::move(other.chunk_info); + chunk_infos = std::move(other.chunk_infos); num_rows = other.num_rows; other.num_rows = 0; return *this; @@ -62,15 +82,15 @@ public: void swap(Chunk & other) noexcept { columns.swap(other.columns); - chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); + chunk_infos.swap(other.chunk_infos); } void clear() { num_rows = 0; columns.clear(); - chunk_info.reset(); + chunk_infos.clear(); } const Columns & getColumns() const { return columns; } @@ -81,9 +101,9 @@ public: /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; - const ChunkInfoPtr & getChunkInfo() const { return chunk_info; } - bool hasChunkInfo() const { return chunk_info != nullptr; } - void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); } + ChunkInfoCollection & getChunkInfos() { return chunk_infos; } + const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; } + void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); } UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } @@ -107,7 +127,7 @@ public: private: Columns columns; UInt64 num_rows = 0; - ChunkInfoPtr chunk_info; + ChunkInfoCollection chunk_infos; void checkNumRowsIsConsistent(); }; @@ -117,11 +137,15 @@ using Chunks = std::vector; /// AsyncInsert needs two kinds of information: /// - offsets of different sub-chunks /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. -class AsyncInsertInfo : public ChunkInfo +class AsyncInsertInfo : public ChunkInfoCloneable { public: AsyncInsertInfo() = default; - explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} + AsyncInsertInfo(const AsyncInsertInfo & other) = default; + AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) + : offsets(offsets_) + , tokens(tokens_) + {} std::vector offsets; std::vector tokens; @@ -130,9 +154,11 @@ public: using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. -class ChunkMissingValues : public ChunkInfo +class ChunkMissingValues : public ChunkInfoCloneable { public: + ChunkMissingValues(const ChunkMissingValues & other) = default; + using RowsBitMask = std::vector; /// a bit per row for a column const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 49ec9999521..82cad471a29 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef NDEBUG #include diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d27002197d2..d9fab88fe1f 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -147,13 +147,10 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto chunk_info = chunk.getChunkInfo()) + if (auto agg_info = chunk.getChunkInfos().get()) { - if (const auto * agg_info = typeid_cast(chunk_info.get())) - { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; - } + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; } return true; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index cbf73c5cb07..25c15d40c9a 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -73,13 +73,10 @@ bool PullingPipelineExecutor::pull(Block & block) } block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto chunk_info = chunk.getChunkInfo()) + if (auto agg_info = chunk.getChunkInfos().get()) { - if (const auto * agg_info = typeid_cast(chunk_info.get())) - { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; - } + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; } return true; diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index fe82d1b1c53..06e8668cd7c 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/FormRowInputFormat.cpp b/src/Processors/Formats/Impl/FormRowInputFormat.cpp index d3c6f3798cc..698efb896ea 100644 --- a/src/Processors/Formats/Impl/FormRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/FormRowInputFormat.cpp @@ -1,7 +1,9 @@ -#include +#include #include "Formats/EscapingRuleUtils.h" #include +#include + namespace DB { diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index a5d334f4f1d..9e499e2c400 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -179,7 +179,9 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); Chunks piece; - piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); + piece.emplace_back(std::move(columns), count); + piece.back().setChunkInfos(concatenated.getChunkInfos()); + writeRowGroup(std::move(piece)); } } diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index 4c52300fbd1..698efecd4b2 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -74,7 +74,8 @@ void registerPrettyFormatWithNoEscapesAndMonoBlock(FormatFactory & factory, cons const Block & sample, const FormatSettings & format_settings) { - bool color = !no_escapes && format_settings.pretty.color.valueOr(format_settings.is_writing_to_terminal); + bool color = !no_escapes + && (format_settings.pretty.color == 1 || (format_settings.pretty.color == 2 && format_settings.is_writing_to_terminal)); return std::make_shared(buf, sample, format_settings, mono_block, color); }); if (!mono_block) diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 1493779ec2d..c23d717d52f 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/Formats/InputFormatErrorsLogger.cpp b/src/Processors/Formats/InputFormatErrorsLogger.cpp index 814c4679cf9..b5aa936df07 100644 --- a/src/Processors/Formats/InputFormatErrorsLogger.cpp +++ b/src/Processors/Formats/InputFormatErrorsLogger.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 4136fc5a5f2..46be6e74693 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -8,8 +8,9 @@ namespace ErrorCodes } IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}), - input(inputs.front()), output(outputs.front()) + : IProcessor({std::move(input_header)}, {std::move(output_header)}) + , input(inputs.front()) + , output(outputs.front()) { } diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index 466adf93538..86675bcb237 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -53,13 +53,11 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - const auto & info = input.chunk.getChunkInfo(); - if (!info) + if (input.chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); Int64 allocated_bytes = 0; - /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator. - if (const auto * arenas_info = typeid_cast(info.get())) + if (auto arenas_info = input.chunk.getChunkInfos().get()) allocated_bytes = arenas_info->allocated_bytes; states[source_num] = State{input.chunk, description, allocated_bytes}; @@ -136,7 +134,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() info->chunk_num = chunk_num++; Chunk chunk; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); return chunk; } @@ -163,7 +161,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation() chunks.emplace_back(std::move(new_columns), current_rows); } - chunks.back().setChunkInfo(std::make_shared()); + chunks.back().getChunkInfos().add(std::make_shared()); states[i].current_row = states[i].to_row; /// We assume that sizes in bytes of rows are almost the same. diff --git a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h index bcf4e759024..e4f22deec8d 100644 --- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h +++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h @@ -6,18 +6,22 @@ namespace DB { /// To carry part level if chunk is produced by a merge tree source -class MergeTreePartLevelInfo : public ChunkInfo +class MergeTreePartLevelInfo : public ChunkInfoCloneable { public: MergeTreePartLevelInfo() = delete; - explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { } + explicit MergeTreePartLevelInfo(ssize_t part_level) + : origin_merge_tree_part_level(part_level) + { } + MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default; + size_t origin_merge_tree_part_level = 0; }; inline size_t getPartLevelFromChunk(const Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - if (const auto * part_level_info = typeid_cast(info.get())) + const auto part_level_info = chunk.getChunkInfos().get(); + if (part_level_info) return part_level_info->origin_merge_tree_part_level; return 0; } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 7b2c7d82a01..cd347d371d9 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false) { - chunk->setChunkInfo(std::make_shared(std::move(chunk->replace_final_selection))); + chunk->getChunkInfos().add(std::make_shared(std::move(chunk->replace_final_selection))); return IMergingAlgorithm::Status(std::move(*chunk), finished); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index a3ccccf0845..2f23f2a5c4d 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace Poco { @@ -14,11 +15,13 @@ namespace DB /** Use in skipping final to keep list of indices of selected row after merging final */ -struct ChunkSelectFinalIndices : public ChunkInfo +struct ChunkSelectFinalIndices : public ChunkInfoCloneable { + explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); + ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default; + const ColumnPtr column_holder; const ColumnUInt64 * select_final_indices = nullptr; - explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); }; /** Merges several sorted inputs into one. diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index fbb47969b2f..b1b0182a113 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full) + if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full) output.push(std::move(state.output_chunk)); if (!is_initialized) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index c218f622870..be629271736 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -129,7 +129,7 @@ public: IMergingAlgorithm::Status status = algorithm.merge(); - if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo()) + if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty()) { // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl; state.output_chunk = std::move(status.chunk); diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h index 70ae4c6156e..27171511703 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h @@ -1,4 +1,6 @@ #pragma once + +#include #include #include diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 1f4f271fa6e..14f58585d3d 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 1badd315200..3cb7f8a5d83 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -8,6 +9,7 @@ #include #include #include + namespace DB { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index e1ef38022b5..28eb4da2e17 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -22,9 +22,10 @@ #include #include #include -#include "Storages/KeyDescription.h" +#include #include #include +#include #include diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 70327bc95b4..9e2c2dcc96d 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -515,10 +516,11 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( if (!candidates.minmax_projection) { - auto it = std::find_if(agg_projections.begin(), agg_projections.end(), [&](const auto * projection) - { - return projection->name == context->getSettings().preferred_optimize_projection_name.value; - }); + auto it = std::find_if( + agg_projections.begin(), + agg_projections.end(), + [&](const auto * projection) + { return projection->name == context->getSettingsRef().preferred_optimize_projection_name.value; }); if (it != agg_projections.end()) { diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 0af3869ccf1..291800c2c86 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -7,9 +7,11 @@ #include #include #include +#include #include #include #include + #include namespace DB::QueryPlanOptimizations @@ -110,10 +112,10 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod return {}; ContextPtr context = reading->getContext(); - auto it = std::find_if(normal_projections.begin(), normal_projections.end(), [&](const auto * projection) - { - return projection->name == context->getSettings().preferred_optimize_projection_name.value; - }); + auto it = std::find_if( + normal_projections.begin(), + normal_projections.end(), + [&](const auto * projection) { return projection->name == context->getSettingsRef().preferred_optimize_projection_name.value; }); if (it != normal_projections.end()) { diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index af1578d6af8..4d60a1135a6 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index dc6d96a721e..6e86d14f5cc 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 84c2515e8ca..e27515a62a4 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -411,8 +412,8 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); const auto & shard = cluster->getShardsInfo().at(0); - size_t all_replicas_count = current_settings.max_parallel_replicas; - if (all_replicas_count > shard.getAllNodeCount()) + size_t max_replicas_to_use = current_settings.max_parallel_replicas; + if (max_replicas_to_use > shard.getAllNodeCount()) { LOG_INFO( getLogger("ReadFromParallelRemoteReplicasStep"), @@ -420,14 +421,14 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder "Will use the latter number to execute the query.", current_settings.max_parallel_replicas, shard.getAllNodeCount()); - all_replicas_count = shard.getAllNodeCount(); + max_replicas_to_use = shard.getAllNodeCount(); } std::vector shuffled_pool; - if (all_replicas_count < shard.getAllNodeCount()) + if (max_replicas_to_use < shard.getAllNodeCount()) { shuffled_pool = shard.pool->getShuffledPools(current_settings); - shuffled_pool.resize(all_replicas_count); + shuffled_pool.resize(max_replicas_to_use); } else { @@ -437,11 +438,10 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder shuffled_pool = shard.pool->getShuffledPools(current_settings, priority_func); } - for (size_t i=0; i < all_replicas_count; ++i) + for (size_t i=0; i < max_replicas_to_use; ++i) { IConnections::ReplicaInfo replica_info { - .all_replicas_count = all_replicas_count, /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it. .number_of_current_replica = i, }; diff --git a/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp b/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp index 4a257bba922..51117ad3c9b 100644 --- a/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp +++ b/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp @@ -1,5 +1,6 @@ #include +#include #include #include diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index eb974259c5e..b070bbe739b 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -35,18 +36,33 @@ inline void iotaWithStepOptimized(T * begin, size_t count, T first_value, T step iotaWithStep(begin, count, first_value, step); } +/// The range is defined as [start, end) +UInt64 itemCountInRange(UInt64 start, UInt64 end, UInt64 step) +{ + const auto range_count = end - start; + if (step == 1) + return range_count; + + return (range_count - 1) / step + 1; +} + class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, std::optional limit_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_) + NumbersSource( + UInt64 block_size_, + UInt64 offset_, + std::optional end_, + const std::string & column_name, + UInt64 step_in_chunk_, + UInt64 step_between_chunks_) : ISource(createHeader(column_name)) , block_size(block_size_) , next(offset_) - , chunk_step(chunk_step_) - , step(step_) + , end(end_) + , step_in_chunk(step_in_chunk_) + , step_between_chunks(step_between_chunks_) { - if (limit_.has_value()) - end = limit_.value() + offset_; } String getName() const override { return "Numbers"; } @@ -63,7 +79,10 @@ protected: { if (end.value() <= next) return {}; - real_block_size = std::min(block_size, end.value() - next); + + auto max_items_to_generate = itemCountInRange(next, *end, step_in_chunk); + + real_block_size = std::min(block_size, max_items_to_generate); } auto column = ColumnUInt64::create(real_block_size); ColumnUInt64::Container & vec = column->getData(); @@ -73,21 +92,20 @@ protected: UInt64 * current_end = &vec[real_block_size]; - iotaWithStepOptimized(pos, static_cast(current_end - pos), curr, step); + iotaWithStepOptimized(pos, static_cast(current_end - pos), curr, step_in_chunk); - next += chunk_step; + next += step_between_chunks; progress(column->size(), column->byteSize()); - return {Columns{std::move(column)}, real_block_size}; } private: UInt64 block_size; UInt64 next; - UInt64 chunk_step; std::optional end; /// not included - UInt64 step; + UInt64 step_in_chunk; + UInt64 step_between_chunks; }; struct RangeWithStep @@ -548,20 +566,39 @@ Pipe ReadFromSystemNumbersStep::makePipe() return pipe; } + const auto end = std::invoke( + [&]() -> std::optional + { + if (numbers_storage.limit.has_value()) + return *(numbers_storage.limit) + numbers_storage.offset; + return {}; + }); + /// Fall back to NumbersSource + /// Range in a single block + const auto block_range = max_block_size * numbers_storage.step; + /// Step between chunks in a single source. + /// It is bigger than block_range in case of multiple threads, because we have to account for other sources as well. + const auto step_between_chunks = num_streams * block_range; for (size_t i = 0; i < num_streams; ++i) { + const auto source_offset = i * block_range; + if (numbers_storage.limit.has_value() && *numbers_storage.limit < source_offset) + break; + + const auto source_start = numbers_storage.offset + source_offset; + auto source = std::make_shared( max_block_size, - numbers_storage.offset + i * max_block_size * numbers_storage.step, - numbers_storage.limit, - num_streams * max_block_size * numbers_storage.step, + source_start, + end, numbers_storage.column_name, - numbers_storage.step); + numbers_storage.step, + step_between_chunks); if (numbers_storage.limit && i == 0) { - auto rows_appr = (*numbers_storage.limit - 1) / numbers_storage.step + 1; + auto rows_appr = itemCountInRange(numbers_storage.offset, *numbers_storage.limit, numbers_storage.step); if (limit > 0 && limit < rows_appr) rows_appr = query_info_limit; source->addTotalRowsApprox(rows_appr); @@ -570,19 +607,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() pipe.addSource(std::move(source)); } - if (numbers_storage.limit) - { - size_t i = 0; - auto storage_limit = (*numbers_storage.limit - 1) / numbers_storage.step + 1; - /// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly. - pipe.addSimpleTransform( - [&](const Block & header) - { - ++i; - return std::make_shared(header, storage_limit * i / num_streams - storage_limit * (i - 1) / num_streams, 0); - }); - } - return pipe; } diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 1c40f84d23d..48fad9f5fdb 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -10,10 +9,13 @@ #include #include #include +#include #include #include +#include + namespace CurrentMetrics { extern const Metric TemporaryFilesForSort; diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index ac5e144bf4a..10384f6296a 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h index 30cf958c072..c05cc1defcb 100644 --- a/src/Processors/Sinks/RemoteSink.h +++ b/src/Processors/Sinks/RemoteSink.h @@ -20,7 +20,7 @@ public: } String getName() const override { return "RemoteSink"; } - void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } + void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); } void onFinish() override { RemoteInserter::onFinish(); } }; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 5f9f9f9b1a1..36bb70f493f 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,9 +15,8 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - consume(chunk.clone()); - if (!lastBlockIsDuplicate()) - cur_chunk = std::move(chunk); + consume(chunk); + cur_chunk = std::move(chunk); } SinkToStorage::GenerateResult SinkToStorage::onGenerate() diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 023bbd8b094..c728fa87b1e 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -18,8 +18,7 @@ public: void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } protected: - virtual void consume(Chunk chunk) = 0; - virtual bool lastBlockIsDuplicate() const { return false; } + virtual void consume(Chunk & chunk) = 0; private: std::vector table_locks; @@ -38,7 +37,7 @@ class NullSinkToStorage : public SinkToStorage public: using SinkToStorage::SinkToStorage; std::string getName() const override { return "NullSinkToStorage"; } - void consume(Chunk) override {} + void consume(Chunk &) override {} }; using SinkPtr = std::shared_ptr; diff --git a/src/Processors/Sources/BlocksSource.h b/src/Processors/Sources/BlocksSource.h index ec0dc9609f1..7ac460c14e2 100644 --- a/src/Processors/Sources/BlocksSource.h +++ b/src/Processors/Sources/BlocksSource.h @@ -43,7 +43,10 @@ protected: info->bucket_num = res.info.bucket_num; info->is_overflows = res.info.is_overflows; - return Chunk(res.getColumns(), res.rows(), std::move(info)); + auto chunk = Chunk(res.getColumns(), res.rows()); + chunk.getChunkInfos().add(std::move(info)); + + return chunk; } private: diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index 985a82a7b17..5d533a7747e 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -2,7 +2,9 @@ #if USE_MYSQL #include + #include +#include #include #include #include diff --git a/src/Processors/Sources/MySQLSource.h b/src/Processors/Sources/MySQLSource.h index 4ae5af22dab..b99e48a7759 100644 --- a/src/Processors/Sources/MySQLSource.h +++ b/src/Processors/Sources/MySQLSource.h @@ -6,11 +6,12 @@ #include #include #include -#include namespace DB { +struct Settings; + struct StreamSettings { /// Check if setting is enabled, otherwise use common `max_block_size` setting. diff --git a/src/Processors/Sources/RecursiveCTESource.cpp b/src/Processors/Sources/RecursiveCTESource.cpp index 221198c622a..40e72e9cbb1 100644 --- a/src/Processors/Sources/RecursiveCTESource.cpp +++ b/src/Processors/Sources/RecursiveCTESource.cpp @@ -17,6 +17,8 @@ #include #include +#include + namespace DB { diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 3d7dd3f76b8..1578bd389c9 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -176,7 +176,7 @@ std::optional RemoteSource::tryGenerate() auto info = std::make_shared(); info->bucket_num = block.info.bucket_num; info->is_overflows = block.info.is_overflows; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); } return chunk; diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 00f40a34361..9abe0504d10 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -5,7 +5,9 @@ namespace DB { -SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} +SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) +{ +} SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { @@ -20,7 +22,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp auto info = std::make_shared(); info->bucket_num = data.info.bucket_num; info->is_overflows = data.info.is_overflows; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); } } diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp index 45e8a96412e..2d7a37d0abe 100644 --- a/src/Processors/TTL/TTLAggregationAlgorithm.cpp +++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp @@ -1,5 +1,6 @@ -#include +#include #include +#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 9ffe15d0f85..45b0960ec8f 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate() variants.aggregates_pool = variants.aggregates_pools.at(0).get(); /// Pass info about used memory by aggregate functions further. - to_push_chunk.setChunkInfo(std::make_shared(cur_block_bytes)); + to_push_chunk.getChunkInfos().add(std::make_shared(cur_block_bytes)); cur_block_bytes = 0; cur_block_size = 0; @@ -351,11 +351,12 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati void FinalizeAggregatedTransform::transform(Chunk & chunk) { if (params->final) - finalizeChunk(chunk, aggregates_mask); - else if (!chunk.getChunkInfo()) { - auto info = std::make_shared(); - chunk.setChunkInfo(std::move(info)); + finalizeChunk(chunk, aggregates_mask); + } + else if (!chunk.getChunkInfos().get()) + { + chunk.getChunkInfos().add(std::make_shared()); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 5d50e97f552..41a0d7fc7f1 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -12,10 +13,12 @@ namespace DB struct InputOrderInfo; using InputOrderInfoPtr = std::shared_ptr; -struct ChunkInfoWithAllocatedBytes : public ChunkInfo +struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable { + ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default; explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} + Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index cdbe194cfac..fda03174c53 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block) UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); return chunk; } @@ -44,15 +44,11 @@ namespace { const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); - - const auto * agg_info = typeid_cast(info.get()); + auto agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); - return agg_info; + return agg_info.get(); } /// Reads chunks from file in native format. Provide chunks with aggregation info. @@ -210,11 +206,7 @@ private: void process(Chunk && chunk) { - if (!chunk.hasChunkInfo()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName()); - - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast(info.get()); + auto chunks_to_merge = chunk.getChunkInfos().get(); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName()); diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index e167acde067..95983c39d1e 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -19,7 +20,7 @@ namespace CurrentMetrics namespace DB { -class AggregatedChunkInfo : public ChunkInfo +class AggregatedChunkInfo : public ChunkInfoCloneable { public: bool is_overflows = false; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 965a084bb13..49a6581e685 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -27,18 +27,12 @@ public: } ExceptionKeepingTransform::work(); - if (finish_chunk) - { - data.chunk = std::move(finish_chunk); - ready_output = true; - } } protected: void onConsume(Chunk chunk) override { - if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) - cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); + cur_chunk = Squashing::squash(std::move(chunk)); } GenerateResult onGenerate() override @@ -48,16 +42,10 @@ protected: res.is_done = true; return res; } - void onFinish() override - { - auto chunk = DB::Squashing::squash({}); - finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); - } private: Squashing squashing; Chunk cur_chunk; - Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 3dfb9fe178f..2c6b3bd8638 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,6 +1,7 @@ - -#include #include + +#include +#include #include #include diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp new file mode 100644 index 00000000000..841090f029e --- /dev/null +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -0,0 +1,238 @@ +#include + +#include + +#include +#include +#include + + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +void RestoreChunkInfosTransform::transform(Chunk & chunk) +{ + chunk.getChunkInfos().appendIfUniq(chunk_infos.clone()); +} + +namespace DeduplicationToken +{ + +String TokenInfo::getToken() const +{ + if (!isDefined()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken()); + + return getTokenImpl(); +} + +String TokenInfo::getTokenImpl() const +{ + String result; + result.reserve(getTotalSize()); + + for (const auto & part : parts) + { + if (!result.empty()) + result.append(":"); + result.append(part); + } + + return result; +} + +String TokenInfo::debugToken() const +{ + return getTokenImpl(); +} + +void TokenInfo::addChunkHash(String part) +{ + if (stage == UNDEFINED && empty()) + stage = DEFINE_SOURCE_WITH_HASHES; + + if (stage != DEFINE_SOURCE_WITH_HASHES) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(std::move(part)); +} + +void TokenInfo::finishChunkHashes() +{ + if (stage == UNDEFINED && empty()) + stage = DEFINE_SOURCE_WITH_HASHES; + + if (stage != DEFINE_SOURCE_WITH_HASHES) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + stage = DEFINED; +} + +void TokenInfo::setUserToken(const String & token) +{ + if (stage == UNDEFINED && empty()) + stage = DEFINE_SOURCE_USER_TOKEN; + + if (stage != DEFINE_SOURCE_USER_TOKEN) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("user-token-{}", token)); +} + +void TokenInfo::setSourceWithUserToken(size_t block_number) +{ + if (stage != DEFINE_SOURCE_USER_TOKEN) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("source-number-{}", block_number)); + + stage = DEFINED; +} + +void TokenInfo::setViewID(const String & id) +{ + if (stage == DEFINED) + stage = DEFINE_VIEW; + + if (stage != DEFINE_VIEW) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("view-id-{}", id)); +} + +void TokenInfo::setViewBlockNumber(size_t block_number) +{ + if (stage != DEFINE_VIEW) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("view-block-{}", block_number)); + + stage = DEFINED; +} + +void TokenInfo::reset() +{ + stage = UNDEFINED; + parts.clear(); +} + +void TokenInfo::addTokenPart(String part) +{ + parts.push_back(std::move(part)); +} + +size_t TokenInfo::getTotalSize() const +{ + if (parts.empty()) + return 0; + + size_t size = 0; + for (const auto & part : parts) + size += part.size(); + + // we reserve more size here to be able to add delimenter between parts. + return size + parts.size() - 1; +} + +#ifdef ABORT_ON_LOGICAL_ERROR +void CheckTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + + if (!token_info) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); + } + + LOG_TEST(log, "debug: {}, token: {}, columns {} rows {}", debug, token_info->debugToken(), chunk.getNumColumns(), chunk.getNumRows()); +} +#endif + +String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk) +{ + SipHash hash; + for (const auto & colunm : chunk.getColumns()) + colunm->updateHashFast(hash); + + const auto hash_value = hash.get128(); + return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); +} + + +void DefineSourceWithChunkHashTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform"); + + if (token_info->isDefined()) + return; + + token_info->addChunkHash(getChunkHash(chunk)); + token_info->finishChunkHashes(); +} + +void SetUserTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetUserTokenTransform"); + token_info->setUserToken(user_token); +} + +void SetSourceBlockNumberTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); + token_info->setSourceWithUserToken(block_number++); +} + +void SetViewIDTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetViewIDTransform"); + token_info->setViewID(view_id); +} + +void SetViewBlockNumberTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); + token_info->setViewBlockNumber(block_number++); +} + +void ResetTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in ResetTokenTransform"); + + token_info->reset(); +} + +} +} diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h new file mode 100644 index 00000000000..d6aff9e1370 --- /dev/null +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -0,0 +1,237 @@ +#pragma once + +#include +#include + +#include +#include "Common/Logger.h" + + +namespace DB +{ + class RestoreChunkInfosTransform : public ISimpleTransform + { + public: + RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , chunk_infos(std::move(chunk_infos_)) + {} + + String getName() const override { return "RestoreChunkInfosTransform"; } + + void transform(Chunk & chunk) override; + + private: + Chunk::ChunkInfoCollection chunk_infos; + }; + + +namespace DeduplicationToken +{ + class TokenInfo : public ChunkInfoCloneable + { + public: + TokenInfo() = default; + TokenInfo(const TokenInfo & other) = default; + + String getToken() const; + String debugToken() const; + + bool empty() const { return parts.empty(); } + + bool isDefined() const { return stage == DEFINED; } + + void addChunkHash(String part); + void finishChunkHashes(); + + void setUserToken(const String & token); + void setSourceWithUserToken(size_t block_number); + + void setViewID(const String & id); + void setViewBlockNumber(size_t block_number); + + void reset(); + + private: + String getTokenImpl() const; + + void addTokenPart(String part); + size_t getTotalSize() const; + + /* Token has to be prepared in a particular order. + * BuildingStage ensures that token is expanded according the following order. + * Firstly token is expanded with information about the source. + * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. + * + * transition // method + * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash + * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash + * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes + * + * transition // method + * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken + * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken + * + * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. + * + * transition // method + * DEFINED -> DEFINE_VIEW // setViewID + * DEFINE_VIEW -> DEFINED // defineViewID + */ + + enum BuildingStage + { + UNDEFINED, + DEFINE_SOURCE_WITH_HASHES, + DEFINE_SOURCE_USER_TOKEN, + DEFINE_VIEW, + DEFINED, + }; + + BuildingStage stage = UNDEFINED; + std::vector parts; + }; + + +#ifdef ABORT_ON_LOGICAL_ERROR + /// use that class only with debug builds in CI for introspection + class CheckTokenTransform : public ISimpleTransform + { + public: + CheckTokenTransform(String debug_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , debug(std::move(debug_)) + { + } + + String getName() const override { return "DeduplicationToken::CheckTokenTransform"; } + + void transform(Chunk & chunk) override; + + private: + String debug; + LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); + }; +#endif + + + class AddTokenInfoTransform : public ISimpleTransform + { + public: + explicit AddTokenInfoTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared()); + } + }; + + + class DefineSourceWithChunkHashTransform : public ISimpleTransform + { + public: + explicit DefineSourceWithChunkHashTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } + + // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts. + // But if there is some table with different engine, we still need to define the source of the data in deduplication token + // We use that transform to define the source as a hash of entire block in deduplication token + void transform(Chunk & chunk) override; + + static String getChunkHash(const Chunk & chunk); + }; + + class ResetTokenTransform : public ISimpleTransform + { + public: + explicit ResetTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::ResetTokenTransform"; } + + void transform(Chunk & chunk) override; + }; + + + class SetUserTokenTransform : public ISimpleTransform + { + public: + SetUserTokenTransform(String user_token_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , user_token(std::move(user_token_)) + { + } + + String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; } + + void transform(Chunk & chunk) override; + + private: + String user_token; + }; + + + class SetSourceBlockNumberTransform : public ISimpleTransform + { + public: + explicit SetSourceBlockNumberTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; } + + void transform(Chunk & chunk) override; + + private: + size_t block_number = 0; + }; + + + class SetViewIDTransform : public ISimpleTransform + { + public: + SetViewIDTransform(String view_id_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , view_id(std::move(view_id_)) + { + } + + String getName() const override { return "DeduplicationToken::SetViewIDTransform"; } + + void transform(Chunk & chunk) override; + + private: + String view_id; + }; + + + class SetViewBlockNumberTransform : public ISimpleTransform + { + public: + explicit SetViewBlockNumberTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; } + + void transform(Chunk & chunk) override; + + private: + size_t block_number = 0; + }; + +} +} diff --git a/src/Processors/Transforms/DistinctSortedChunkTransform.h b/src/Processors/Transforms/DistinctSortedChunkTransform.h index 188e3d5c4c7..cdb20410a5b 100644 --- a/src/Processors/Transforms/DistinctSortedChunkTransform.h +++ b/src/Processors/Transforms/DistinctSortedChunkTransform.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Processors/Transforms/DistinctSortedTransform.h b/src/Processors/Transforms/DistinctSortedTransform.h index 86c5c968e4b..52819f1afd7 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.h +++ b/src/Processors/Transforms/DistinctSortedTransform.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 2fbd2c21b8d..04fabc9a3c6 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,5 +1,7 @@ #include #include + + namespace DB { diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 3e2a9462e54..ca204bcb482 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -365,10 +365,9 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() return Status::Finished; } - if (!data.chunk.hasChunkInfo()) + task = data.chunk.getChunkInfos().get(); + if (!task) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); - - task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else { @@ -479,7 +478,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (output.isFinished()) continue; Chunk chunk; - chunk.setChunkInfo(std::make_shared()); + chunk.getChunkInfos().add(std::make_shared()); output.push(std::move(chunk)); output.finish(); } @@ -496,7 +495,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() { Chunk chunk; auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); - chunk.setChunkInfo(task); + chunk.getChunkInfos().add(std::move(task)); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index a308af03662..5f6d9d6fff2 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,6 +1,7 @@ #pragma once #include - +#include +#include namespace DB { @@ -111,11 +112,12 @@ private: }; -class DelayedBlocksTask : public ChunkInfo +class DelayedBlocksTask : public ChunkInfoCloneable { public: DelayedBlocksTask() = default; + DelayedBlocksTask(const DelayedBlocksTask & other) = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 1eaa5458d37..9ae80e21a68 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,6 +1,7 @@ #include #include + namespace DB { diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index 607087fb39c..d7bc320173b 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -150,11 +150,7 @@ private: if (!chunk.hasRows()) return; - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); - - const auto * agg_info = typeid_cast(info.get()); + const auto & agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception( ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform."); diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index fb3b2faa9c5..e96a75d277b 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -34,13 +34,20 @@ namespace ErrorCodes namespace { -FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns) +FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness) { SortDescription desc; desc.reserve(columns.size()); for (const auto & name : columns) desc.emplace_back(name); - return std::make_unique(block, desc); + return std::make_unique(block, desc, strictness == JoinStrictness::Asof); +} + +bool ALWAYS_INLINE isNullAt(const IColumn & column, size_t row) +{ + if (const auto * nullable_column = checkAndGetColumn(&column)) + return nullable_column->isNullAt(row); + return false; } template @@ -54,7 +61,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, if (left_nullable && right_nullable) { int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint); - if (res) + if (res != 0) return res; /// NULL != NULL case @@ -90,9 +97,10 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos, const SortCursorImpl & rhs, size_t rpos, + size_t key_length, int null_direction_hint) { - for (size_t i = 0; i < lhs.sort_columns_size; ++i) + for (size_t i = 0; i < key_length; ++i) { /// TODO(@vdimir): use nullableCompareAt only if there's nullable columns int cmp = nullableCompareAt(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint); @@ -104,13 +112,18 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos, int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint) { - return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), null_direction_hint); + return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint); +} + +int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs, int null_direction_hint) +{ + return nullableCompareAt(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow(), null_direction_hint); } bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint) { /// The last row of left cursor is less than the current row of the right cursor. - int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), null_direction_hint); + int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint); return cmp < 0; } @@ -222,25 +235,136 @@ Chunk getRowFromChunk(const Chunk & chunk, size_t pos) return result; } -void inline addRange(PaddedPODArray & left_map, size_t start, size_t end) +void inline addRange(PaddedPODArray & values, UInt64 start, UInt64 end) { assert(end > start); - for (size_t i = start; i < end; ++i) - left_map.push_back(i); + for (UInt64 i = start; i < end; ++i) + values.push_back(i); } -void inline addMany(PaddedPODArray & left_or_right_map, size_t idx, size_t num) +void inline addMany(PaddedPODArray & values, UInt64 value, size_t num) { - for (size_t i = 0; i < num; ++i) - left_or_right_map.push_back(idx); + values.resize_fill(values.size() + num, value); } } -FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) - : sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_) +JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos) { + row.reserve(cursor->sort_columns.size()); + for (const auto & col : cursor->sort_columns) + { + auto new_col = col->cloneEmpty(); + new_col->insertFrom(*col, pos); + row.push_back(std::move(new_col)); + } + if (const IColumn * asof_column = cursor.getAsofColumn()) + { + if (const auto * nullable_asof_column = checkAndGetColumn(asof_column)) + { + /// We save matched column, and since NULL do not match anything, we can't use it as a key + chassert(!nullable_asof_column->isNullAt(pos)); + asof_column = nullable_asof_column->getNestedColumnPtr().get(); + } + auto new_col = asof_column->cloneEmpty(); + new_col->insertFrom(*asof_column, pos); + row.push_back(std::move(new_col)); + } } +void JoinKeyRow::reset() +{ + row.clear(); +} + +bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const +{ + if (row.empty()) + return false; + + for (size_t i = 0; i < cursor->sort_columns_size; ++i) + { + // int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction); + int cmp = nullableCompareAt(*this->row[i], *cursor->sort_columns[i], 0, cursor->getRow(), cursor->desc[i].nulls_direction); + if (cmp != 0) + return false; + } + return true; +} + +bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const +{ + chassert(this->row.size() == cursor->sort_columns_size + 1); + if (!equals(cursor)) + return false; + + const auto & asof_row = row.back(); + if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow())) + return false; + + int cmp = 0; + if (const auto * nullable_column = checkAndGetColumn(cursor.getAsofColumn())) + cmp = nullable_column->getNestedColumn().compareAt(cursor->getRow(), 0, *asof_row, 1); + else + cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1); + + return (asof_inequality == ASOFJoinInequality::Less && cmp < 0) + || (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0) + || (asof_inequality == ASOFJoinInequality::Greater && cmp > 0) + || (asof_inequality == ASOFJoinInequality::GreaterOrEquals && cmp >= 0); +} + +void AnyJoinState::set(size_t source_num, const FullMergeJoinCursor & cursor) +{ + assert(cursor->rows); + keys[source_num] = JoinKeyRow(cursor, cursor->rows - 1); +} + +void AnyJoinState::reset(size_t source_num) +{ + keys[source_num].reset(); + value.clear(); +} + +void AnyJoinState::setValue(Chunk value_) +{ + value = std::move(value_); +} + +bool AnyJoinState::empty() const { return keys[0].row.empty() && keys[1].row.empty(); } + + +void AsofJoinState::set(const FullMergeJoinCursor & rcursor, size_t rpos) +{ + key = JoinKeyRow(rcursor, rpos); + value = rcursor.getCurrent().clone(); + value_row = rpos; +} + +void AsofJoinState::reset() +{ + key.reset(); + value.clear(); +} + +FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof) + : sample_block(materializeBlock(sample_block_).cloneEmpty()) + , desc(description_) +{ + if (desc.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor"); + + if (is_asof) + { + /// For ASOF join prefix of sort description is used for equality comparison + /// and the last column is used for inequality comparison and is handled separately + + auto asof_column_description = desc.back(); + desc.pop_back(); + + chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1); + asof_column_position = sample_block.getPositionByName(asof_column_description.column_name); + } +} const Chunk & FullMergeJoinCursor::getCurrent() const { @@ -278,48 +402,103 @@ bool FullMergeJoinCursor::fullyCompleted() const return !cursor.isValid() && recieved_all_blocks; } +String FullMergeJoinCursor::dump() const +{ + Strings row_dump; + if (cursor.isValid()) + { + Field val; + for (size_t i = 0; i < cursor.sort_columns_size; ++i) + { + cursor.sort_columns[i]->get(cursor.getRow(), val); + row_dump.push_back(val.dump()); + } + + if (const auto * asof_column = getAsofColumn()) + { + asof_column->get(cursor.getRow(), val); + row_dump.push_back(val.dump()); + } + } + + return fmt::format("<{}/{}{}>[{}]", + cursor.getRow(), cursor.rows, + recieved_all_blocks ? "(finished)" : "", + fmt::join(row_dump, ", ")); +} + MergeJoinAlgorithm::MergeJoinAlgorithm( - JoinPtr table_join_, + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, const Blocks & input_headers, size_t max_block_size_) - : table_join(table_join_) + : kind(kind_) + , strictness(strictness_) , max_block_size(max_block_size_) , log(getLogger("MergeJoinAlgorithm")) { if (input_headers.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs"); - auto strictness = table_join->getTableJoin().strictness(); - if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All) + if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All && strictness != JoinStrictness::Asof) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness); - auto kind = table_join->getTableJoin().kind(); + if (strictness == JoinStrictness::Asof) + { + if (kind != JoinKind::Left && kind != JoinKind::Inner) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not implement ASOF {} join", kind); + } + if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind); - const auto & join_on = table_join->getTableJoin().getOnlyClause(); - - if (join_on.on_filter_condition_left || join_on.on_filter_condition_right) + if (on_clause_.on_filter_condition_left || on_clause_.on_filter_condition_right) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions"); cursors = { - createCursor(input_headers[0], join_on.key_names_left), - createCursor(input_headers[1], join_on.key_names_right) + createCursor(input_headers[0], on_clause_.key_names_left, strictness), + createCursor(input_headers[1], on_clause_.key_names_right, strictness), }; +} - for (const auto & [left_key, right_key] : table_join->getTableJoin().leftToRightKeyRemap()) +MergeJoinAlgorithm::MergeJoinAlgorithm( + JoinPtr join_ptr, + const Blocks & input_headers, + size_t max_block_size_) + : MergeJoinAlgorithm( + join_ptr->getTableJoin().kind(), + join_ptr->getTableJoin().strictness(), + join_ptr->getTableJoin().getOnlyClause(), + input_headers, + max_block_size_) +{ + for (const auto & [left_key, right_key] : join_ptr->getTableJoin().leftToRightKeyRemap()) { size_t left_idx = input_headers[0].getPositionByName(left_key); size_t right_idx = input_headers[1].getPositionByName(right_key); left_to_right_key_remap[left_idx] = right_idx; } - const auto *smjPtr = typeid_cast(table_join.get()); + const auto *smjPtr = typeid_cast(join_ptr.get()); if (smjPtr) { null_direction_hint = smjPtr->getNullDirection(); } + if (strictness == JoinStrictness::Asof) + setAsofInequality(join_ptr->getTableJoin().getAsofInequality()); +} + +void MergeJoinAlgorithm::setAsofInequality(ASOFJoinInequality asof_inequality_) +{ + if (strictness != JoinStrictness::Asof) + throw Exception(ErrorCodes::LOGICAL_ERROR, "setAsofInequality is only supported for ASOF joins"); + + if (asof_inequality_ == ASOFJoinInequality::None) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ASOF inequality cannot be None"); + + asof_inequality = asof_inequality_; } void MergeJoinAlgorithm::logElapsed(double seconds) @@ -407,7 +586,7 @@ struct AllJoinImpl size_t lnum = nextDistinct(left_cursor.cursor); size_t rnum = nextDistinct(right_cursor.cursor); - bool all_fit_in_block = std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size; + bool all_fit_in_block = !max_block_size || std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size; bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid(); if (all_fit_in_block && have_all_ranges) { @@ -421,7 +600,7 @@ struct AllJoinImpl else { assert(state == nullptr); - state = std::make_unique(left_cursor.cursor, lpos, right_cursor.cursor, rpos); + state = std::make_unique(left_cursor, lpos, right_cursor, rpos); state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum); state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum); return; @@ -466,6 +645,17 @@ void dispatchKind(JoinKind kind, Args && ... args) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind); } +MutableColumns MergeJoinAlgorithm::getEmptyResultColumns() const +{ + MutableColumns result_cols; + for (size_t i = 0; i < 2; ++i) + { + for (const auto & col : cursors[i]->sampleColumns()) + result_cols.push_back(col->cloneEmpty()); + } + return result_cols; +} + std::optional MergeJoinAlgorithm::handleAllJoinState() { if (all_join_state && all_join_state->finished()) @@ -479,7 +669,7 @@ std::optional MergeJoinAlgorithm::handleAllJoinState /// Accumulate blocks with same key in all_join_state for (size_t i = 0; i < 2; ++i) { - if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(cursors[i]->cursor)) + if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(*cursors[i])) { size_t pos = cursors[i]->cursor.getRow(); size_t num = nextDistinct(cursors[i]->cursor); @@ -499,15 +689,10 @@ std::optional MergeJoinAlgorithm::handleAllJoinState stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored()); /// join all rows with current key - MutableColumns result_cols; - for (size_t i = 0; i < 2; ++i) - { - for (const auto & col : cursors[i]->sampleColumns()) - result_cols.push_back(col->cloneEmpty()); - } + MutableColumns result_cols = getEmptyResultColumns(); size_t total_rows = 0; - while (total_rows < max_block_size) + while (!max_block_size || total_rows < max_block_size) { const auto & left_range = all_join_state->getLeft(); const auto & right_range = all_join_state->getRight(); @@ -532,7 +717,52 @@ std::optional MergeJoinAlgorithm::handleAllJoinState return {}; } -MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin(JoinKind kind) +std::optional MergeJoinAlgorithm::handleAsofJoinState() +{ + if (strictness != JoinStrictness::Asof) + return {}; + + if (!cursors[1]->fullyCompleted()) + return {}; + + auto & left_cursor = *cursors[0]; + const auto & left_columns = left_cursor.getCurrent().getColumns(); + + MutableColumns result_cols = getEmptyResultColumns(); + + while (left_cursor->isValid() && asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, left_cursor->getRow()); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + left_cursor->next(); + } + + while (isLeft(kind) && left_cursor->isValid()) + { + /// return row with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, left_cursor->getRow()); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertDefault(); + chassert(i == result_cols.size()); + + left_cursor->next(); + } + + size_t result_rows = result_cols.empty() ? 0 : result_cols.front()->size(); + if (result_rows) + return Status(Chunk(std::move(result_cols), result_rows)); + + return {}; +} + + +MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin() { PaddedPODArray idx_map[2]; @@ -595,7 +825,7 @@ struct AnyJoinImpl FullMergeJoinCursor & right_cursor, PaddedPODArray & left_map, PaddedPODArray & right_map, - AnyJoinState & state, + AnyJoinState & any_join_state, int null_direction_hint) { assert(enabled); @@ -656,21 +886,21 @@ struct AnyJoinImpl } } - /// Remember index of last joined row to propagate it to next block + /// Remember last joined row to propagate it to next block - state.setValue({}); + any_join_state.setValue({}); if (!left_cursor->isValid()) { - state.set(0, left_cursor.cursor); + any_join_state.set(0, left_cursor); if (cmp == 0 && isLeft(kind)) - state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); + any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); } if (!right_cursor->isValid()) { - state.set(1, right_cursor.cursor); + any_join_state.set(1, right_cursor); if (cmp == 0 && isRight(kind)) - state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); + any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); } } }; @@ -680,40 +910,34 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState if (any_join_state.empty()) return {}; - auto kind = table_join->getTableJoin().kind(); - Chunk result; for (size_t source_num = 0; source_num < 2; ++source_num) { auto & current = *cursors[source_num]; - auto & state = any_join_state; - if (any_join_state.keys[source_num].equals(current.cursor)) + if (any_join_state.keys[source_num].equals(current)) { size_t start_pos = current->getRow(); size_t length = nextDistinct(current.cursor); if (length && isLeft(kind) && source_num == 0) { - if (state.value) - result = copyChunkResized(current.getCurrent(), state.value, start_pos, length); + if (any_join_state.value) + result = copyChunkResized(current.getCurrent(), any_join_state.value, start_pos, length); else result = createBlockWithDefaults(source_num, start_pos, length); } if (length && isRight(kind) && source_num == 1) { - if (state.value) - result = copyChunkResized(state.value, current.getCurrent(), start_pos, length); + if (any_join_state.value) + result = copyChunkResized(any_join_state.value, current.getCurrent(), start_pos, length); else result = createBlockWithDefaults(source_num, start_pos, length); } - /// We've found row with other key, no need to skip more rows with current key if (current->isValid()) - { - state.keys[source_num].reset(); - } + any_join_state.keys[source_num].reset(); } else { @@ -726,7 +950,7 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState return {}; } -MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind) +MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin() { if (auto result = handleAnyJoinState()) return std::move(*result); @@ -771,10 +995,151 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind) return Status(std::move(result)); } + +MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() +{ + auto & left_cursor = *cursors[0]; + if (!left_cursor->isValid()) + return Status(0); + + auto & right_cursor = *cursors[1]; + if (!right_cursor->isValid()) + return Status(1); + + const auto & left_columns = left_cursor.getCurrent().getColumns(); + const auto & right_columns = right_cursor.getCurrent().getColumns(); + + MutableColumns result_cols = getEmptyResultColumns(); + + while (left_cursor->isValid() && right_cursor->isValid()) + { + auto lpos = left_cursor->getRow(); + auto rpos = right_cursor->getRow(); + auto cmp = compareCursors(*left_cursor, *right_cursor, null_direction_hint); + if (cmp == 0) + { + if (isNullAt(*left_cursor.getAsofColumn(), lpos)) + cmp = -1; + if (isNullAt(*right_cursor.getAsofColumn(), rpos)) + cmp = 1; + } + + if (cmp == 0) + { + auto asof_cmp = compareAsofCursors(left_cursor, right_cursor, null_direction_hint); + + if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1) + || (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0)) + { + /// First row in right table that is greater (or equal) than current row in left table + /// matches asof join condition the best + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : right_columns) + result_cols[i++]->insertFrom(*col, rpos); + chassert(i == result_cols.size()); + + left_cursor->next(); + continue; + } + + if (asof_inequality == ASOFJoinInequality::Less || asof_inequality == ASOFJoinInequality::LessOrEquals) + { + /// Asof condition is not (yet) satisfied, skip row in right table + right_cursor->next(); + continue; + } + + if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1) + || (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0)) + { + /// condition is satisfied, remember this row and move next to try to find better match + asof_join_state.set(right_cursor, rpos); + right_cursor->next(); + continue; + } + + if (asof_inequality == ASOFJoinInequality::Greater || asof_inequality == ASOFJoinInequality::GreaterOrEquals) + { + /// Asof condition is not satisfied anymore, use last matched row from right table + if (asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + } + else + { + asof_join_state.reset(); + if (isLeft(kind)) + { + /// return row with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertDefault(); + chassert(i == result_cols.size()); + } + } + left_cursor->next(); + continue; + } + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join"); + } + else if (cmp < 0) + { + if (asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + left_cursor->next(); + continue; + } + else + { + asof_join_state.reset(); + } + + /// no matches for rows in left table, just pass them through + size_t num = nextDistinct(*left_cursor); + + if (isLeft(kind) && num) + { + /// return them with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertRangeFrom(*col, lpos, num); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertManyDefaults(num); + chassert(i == result_cols.size()); + } + } + else + { + /// skip rows in right table until we find match for current row in left table + nextDistinct(*right_cursor); + } + } + size_t num_rows = result_cols.empty() ? 0 : result_cols.front()->size(); + return Status(Chunk(std::move(result_cols), num_rows)); +} + + /// if `source_num == 0` get data from left cursor and fill defaults at right /// otherwise - vice versa Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const { + ColumnRawPtrs cols; { const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns(); @@ -797,7 +1162,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t star cols.push_back(col.get()); } } - Chunk result_chunk; copyColumnsResized(cols, start, num_rows, result_chunk); return result_chunk; @@ -813,7 +1177,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num) IMergingAlgorithm::Status MergeJoinAlgorithm::merge() { - auto kind = table_join->getTableJoin().kind(); if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted()) return Status(0); @@ -821,11 +1184,11 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted()) return Status(1); - if (auto result = handleAllJoinState()) - { return std::move(*result); - } + + if (auto result = handleAsofJoinState()) + return std::move(*result); if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted()) { @@ -839,7 +1202,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() } /// check if blocks are not intersecting at all - if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0) + if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0 && strictness != JoinStrictness::Asof) { if (cmp < 0) { @@ -858,13 +1221,14 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() } } - auto strictness = table_join->getTableJoin().strictness(); - if (strictness == JoinStrictness::Any) - return anyJoin(kind); + return anyJoin(); if (strictness == JoinStrictness::All) - return allJoin(kind); + return allJoin(); + + if (strictness == JoinStrictness::Asof) + return asofJoin(); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness); } @@ -883,9 +1247,26 @@ MergeJoinTransform::MergeJoinTransform( /* always_read_till_end_= */ false, /* empty_chunk_on_finish_= */ true, table_join, input_headers, max_block_size) - , log(getLogger("MergeJoinTransform")) { - LOG_TRACE(log, "Use MergeJoinTransform"); +} + +MergeJoinTransform::MergeJoinTransform( + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + const Block & output_header, + size_t max_block_size, + UInt64 limit_hint_) + : IMergingTransform( + input_headers, + output_header, + /* have_all_inputs_= */ true, + limit_hint_, + /* always_read_till_end_= */ false, + /* empty_chunk_on_finish_= */ true, + kind_, strictness_, on_clause_, input_headers, max_block_size) +{ } void MergeJoinTransform::onFinish() diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index 5ca6b076544..d37a0b9f3ae 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -19,6 +20,7 @@ #include #include #include +#include namespace Poco { class Logger; } @@ -35,57 +37,28 @@ using FullMergeJoinCursorPtr = std::unique_ptr; /// Used instead of storing previous block struct JoinKeyRow { - std::vector row; - JoinKeyRow() = default; - explicit JoinKeyRow(const SortCursorImpl & impl_, size_t pos) - { - row.reserve(impl_.sort_columns.size()); - for (const auto & col : impl_.sort_columns) - { - auto new_col = col->cloneEmpty(); - new_col->insertFrom(*col, pos); - row.push_back(std::move(new_col)); - } - } + JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos); - void reset() - { - row.clear(); - } + bool equals(const FullMergeJoinCursor & cursor) const; + bool asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const; - bool equals(const SortCursorImpl & impl) const - { - if (row.empty()) - return false; + void reset(); - assert(this->row.size() == impl.sort_columns_size); - for (size_t i = 0; i < impl.sort_columns_size; ++i) - { - int cmp = this->row[i]->compareAt(0, impl.getRow(), *impl.sort_columns[i], impl.desc[i].nulls_direction); - if (cmp != 0) - return false; - } - return true; - } + std::vector row; }; /// Remembers previous key if it was joined in previous block class AnyJoinState : boost::noncopyable { public: - AnyJoinState() = default; + void set(size_t source_num, const FullMergeJoinCursor & cursor); + void setValue(Chunk value_); - void set(size_t source_num, const SortCursorImpl & cursor) - { - assert(cursor.rows); - keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1); - } + void reset(size_t source_num); - void setValue(Chunk value_) { value = std::move(value_); } - - bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); } + bool empty() const; /// current keys JoinKeyRow keys[2]; @@ -118,8 +91,8 @@ public: Chunk chunk; }; - AllJoinState(const SortCursorImpl & lcursor, size_t lpos, - const SortCursorImpl & rcursor, size_t rpos) + AllJoinState(const FullMergeJoinCursor & lcursor, size_t lpos, + const FullMergeJoinCursor & rcursor, size_t rpos) : keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)} { } @@ -187,13 +160,32 @@ private: size_t ridx = 0; }; + +class AsofJoinState : boost::noncopyable +{ +public: + void set(const FullMergeJoinCursor & rcursor, size_t rpos); + void reset(); + + bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const + { + if (value.empty()) + return false; + return key.asofMatch(cursor, asof_inequality); + } + + JoinKeyRow key; + Chunk value; + size_t value_row = 0; +}; + /* * Wrapper for SortCursorImpl */ class FullMergeJoinCursor : boost::noncopyable { public: - explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_); + explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof = false); bool fullyCompleted() const; void setChunk(Chunk && chunk); @@ -203,17 +195,31 @@ public: SortCursorImpl * operator-> () { return &cursor; } const SortCursorImpl * operator-> () const { return &cursor; } + SortCursorImpl & operator* () { return cursor; } + const SortCursorImpl & operator* () const { return cursor; } + SortCursorImpl cursor; const Block & sampleBlock() const { return sample_block; } Columns sampleColumns() const { return sample_block.getColumns(); } + const IColumn * getAsofColumn() const + { + if (!asof_column_position) + return nullptr; + return cursor.all_columns[*asof_column_position]; + } + + String dump() const; + private: Block sample_block; SortDescription desc; Chunk current_chunk; bool recieved_all_blocks = false; + + std::optional asof_column_position; }; /* @@ -223,22 +229,33 @@ private: class MergeJoinAlgorithm final : public IMergingAlgorithm { public: - explicit MergeJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_); + MergeJoinAlgorithm(JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + size_t max_block_size_); + + MergeJoinAlgorithm(JoinPtr join_ptr, const Blocks & input_headers, size_t max_block_size_); const char * getName() const override { return "MergeJoinAlgorithm"; } void initialize(Inputs inputs) override; void consume(Input & input, size_t source_num) override; Status merge() override; - void logElapsed(double seconds); + void setAsofInequality(ASOFJoinInequality asof_inequality_); + void logElapsed(double seconds); private: std::optional handleAnyJoinState(); - Status anyJoin(JoinKind kind); + Status anyJoin(); std::optional handleAllJoinState(); - Status allJoin(JoinKind kind); + Status allJoin(); + std::optional handleAsofJoinState(); + Status asofJoin(); + + MutableColumns getEmptyResultColumns() const; Chunk createBlockWithDefaults(size_t source_num); Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const; @@ -246,12 +263,15 @@ private: std::unordered_map left_to_right_key_remap; std::array cursors; + ASOFJoinInequality asof_inequality = ASOFJoinInequality::None; - /// Keep some state to make connection between data in different blocks + /// Keep some state to make handle data from different blocks AnyJoinState any_join_state; std::unique_ptr all_join_state; + AsofJoinState asof_join_state; - JoinPtr table_join; + JoinKind kind; + JoinStrictness strictness; size_t max_block_size; int null_direction_hint = 1; @@ -281,12 +301,21 @@ public: size_t max_block_size, UInt64 limit_hint = 0); + MergeJoinTransform( + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + const Block & output_header, + size_t max_block_size, + UInt64 limit_hint_ = 0); + String getName() const override { return "MergeJoinTransform"; } + void setAsofInequality(ASOFJoinInequality asof_inequality_) { algorithm.setAsofInequality(asof_inequality_); } + protected: void onFinish() override; - - LoggerPtr log; }; } diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index fc40c6894bb..ea9ebb0f96e 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_ auto info = std::make_shared(); info->bucket_num = bucket; info->is_overflows = is_overflows; - info->chunks = std::make_unique(std::move(chunks)); + info->chunks = std::make_shared(std::move(chunks)); Chunk chunk; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); output.push(std::move(chunk)); } @@ -255,11 +255,10 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) if (!chunk.hasRows()) return; - const auto & info = chunk.getChunkInfo(); - if (!info) + if (chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); - if (const auto * agg_info = typeid_cast(info.get())) + if (auto agg_info = chunk.getChunkInfos().get()) { Int32 bucket = agg_info->bucket_num; bool is_overflows = agg_info->is_overflows; @@ -275,7 +274,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (typeid_cast(info.get())) + else if (chunk.getChunkInfos().get()) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -304,7 +303,11 @@ void GroupingAggregatedTransform::work() Int32 bucket = cur_block.info.bucket_num; auto chunk_info = std::make_shared(); chunk_info->bucket_num = bucket; - chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); + + auto chunk = Chunk(cur_block.getColumns(), cur_block.rows()); + chunk.getChunkInfos().add(std::move(chunk_info)); + + chunks_map[bucket].emplace_back(std::move(chunk)); } } } @@ -319,9 +322,7 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform( void MergingAggregatedBucketTransform::transform(Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast(info.get()); - + auto chunks_to_merge = chunk.getChunkInfos().get(); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); @@ -330,11 +331,10 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) BlocksList blocks_list; for (auto & cur_chunk : *chunks_to_merge->chunks) { - const auto & cur_info = cur_chunk.getChunkInfo(); - if (!cur_info) + if (cur_chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); - if (const auto * agg_info = typeid_cast(cur_info.get())) + if (auto agg_info = cur_chunk.getChunkInfos().get()) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = agg_info->is_overflows; @@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (typeid_cast(cur_info.get())) + else if (cur_chunk.getChunkInfos().get()) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; @@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->is_overflows = chunks_to_merge->is_overflows; res_info->bucket_num = chunks_to_merge->bucket_num; res_info->chunk_num = chunks_to_merge->chunk_num; - chunk.setChunkInfo(std::move(res_info)); + chunk.getChunkInfos().add(std::move(res_info)); auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); @@ -405,11 +405,7 @@ bool SortingAggregatedTransform::tryPushChunk() void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); - - const auto * agg_info = typeid_cast(info.get()); + auto agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 77ee3034ffc..3a3c1bd9c1e 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -142,9 +143,9 @@ private: void addChunk(Chunk chunk, size_t from_input); }; -struct ChunksToMerge : public ChunkInfo +struct ChunksToMerge : public ChunkInfoCloneable { - std::unique_ptr chunks; + std::shared_ptr chunks; Int32 bucket_num = -1; bool is_overflows = false; UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index ad723da7527..446e60a0b81 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -32,11 +32,10 @@ void MergingAggregatedTransform::consume(Chunk chunk) total_input_rows += input_rows; ++total_input_blocks; - const auto & info = chunk.getChunkInfo(); - if (!info) + if (chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); - if (const auto * agg_info = typeid_cast(info.get())) + if (auto agg_info = chunk.getChunkInfos().get()) { /** If the remote servers used a two-level aggregation method, * then blocks will contain information about the number of the bucket. @@ -49,7 +48,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (typeid_cast(info.get())) + else if (chunk.getChunkInfos().get()) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; @@ -89,7 +88,8 @@ Chunk MergingAggregatedTransform::generate() UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); + + chunk.getChunkInfos().add(std::move(info)); return chunk; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 0f433165f14..ee4dfa6a64e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -10,20 +10,20 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) + Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header_, header_) + , squashing(header_, min_block_size_rows, min_block_size_bytes) { } void PlanSquashingTransform::consume(Chunk chunk) { - if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo()) - squashed_chunk.swap(current_chunk); + squashed_chunk = squashing.add(std::move(chunk)); } Chunk PlanSquashingTransform::generate() { - if (!squashed_chunk.hasChunkInfo()) + if (!squashed_chunk) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); Chunk result_chunk; @@ -33,12 +33,11 @@ Chunk PlanSquashingTransform::generate() bool PlanSquashingTransform::canGenerate() { - return squashed_chunk.hasChunkInfo(); + return bool(squashed_chunk); } Chunk PlanSquashingTransform::getRemaining() { - Chunk current_chunk = squashing.flush(); - return current_chunk; + return squashing.flush(); } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 4ad2ec2d089..e6db245499e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } @@ -23,7 +23,6 @@ protected: private: Squashing squashing; Chunk squashed_chunk; - Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/ScatterByPartitionTransform.cpp b/src/Processors/Transforms/ScatterByPartitionTransform.cpp index 6e3cdc0fda1..16d265c9bcb 100644 --- a/src/Processors/Transforms/ScatterByPartitionTransform.cpp +++ b/src/Processors/Transforms/ScatterByPartitionTransform.cpp @@ -109,7 +109,7 @@ void ScatterByPartitionTransform::generateOutputChunks() hash.reset(num_rows); for (const auto & column_number : key_columns) - columns[column_number]->updateWeakHash32(hash); + hash.update(columns[column_number]->getWeakHash32()); const auto & hash_data = hash.getData(); IColumn::Selector selector(num_rows); diff --git a/src/Processors/Transforms/SelectByIndicesTransform.h b/src/Processors/Transforms/SelectByIndicesTransform.h index 480ab1a0f61..b44f5a3203e 100644 --- a/src/Processors/Transforms/SelectByIndicesTransform.h +++ b/src/Processors/Transforms/SelectByIndicesTransform.h @@ -26,7 +26,7 @@ public: void transform(Chunk & chunk) override { size_t num_rows = chunk.getNumRows(); - const auto * select_final_indices_info = typeid_cast(chunk.getChunkInfo().get()); + auto select_final_indices_info = chunk.getChunkInfos().extract(); if (!select_final_indices_info || !select_final_indices_info->select_final_indices) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column"); @@ -41,7 +41,6 @@ public: chunk.setColumns(std::move(columns), index_column->size()); } - chunk.setChunkInfo(nullptr); } }; diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index b5a40c75c5b..490a57d4e23 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -1,13 +1,14 @@ +#include #include #include +#include "Processors/Chunk.h" namespace DB { namespace ErrorCodes { -extern const int LOGICAL_ERROR; -extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; } SquashingTransform::SquashingTransform( @@ -19,9 +20,7 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (planned_chunk.hasChunkInfo()) - cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); + cur_chunk = Squashing::squash(squashing.add(std::move(chunk))); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -34,10 +33,7 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - Chunk chunk = squashing.flush(); - if (chunk.hasChunkInfo()) - chunk = DB::Squashing::squash(std::move(chunk)); - finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + finish_chunk = Squashing::squash(squashing.flush()); } void SquashingTransform::work() @@ -50,6 +46,7 @@ void SquashingTransform::work() } ExceptionKeepingTransform::work(); + if (finish_chunk) { data.chunk = std::move(finish_chunk); @@ -59,14 +56,14 @@ void SquashingTransform::work() SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } void SimpleSquashingChunksTransform::consume(Chunk chunk) { - Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); - squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + squashed_chunk = Squashing::squash(squashing.add(std::move(chunk))); } Chunk SimpleSquashingChunksTransform::generate() @@ -74,7 +71,9 @@ Chunk SimpleSquashingChunksTransform::generate() if (squashed_chunk.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); - return std::move(squashed_chunk); + Chunk result; + result.swap(squashed_chunk); + return result; } bool SimpleSquashingChunksTransform::canGenerate() @@ -84,143 +83,7 @@ bool SimpleSquashingChunksTransform::canGenerate() Chunk SimpleSquashingChunksTransform::getRemaining() { - Block current_block = squashing.add({}); - squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); - return std::move(squashed_chunk); + return Squashing::squash(squashing.flush()); } -SquashingLegacy::SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) - , min_block_size_bytes(min_block_size_bytes_) -{ -} - -Block SquashingLegacy::add(Block && input_block) -{ - return addImpl(std::move(input_block)); -} - -Block SquashingLegacy::add(const Block & input_block) -{ - return addImpl(input_block); -} - -/* - * To minimize copying, accept two types of argument: const reference for output - * stream, and rvalue reference for input stream, and decide whether to copy - * inside this function. This allows us not to copy Block unless we absolutely - * have to. - */ -template -Block SquashingLegacy::addImpl(ReferenceType input_block) -{ - /// End of input stream. - if (!input_block) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Just read block is already enough. - if (isEnoughSize(input_block)) - { - /// If no accumulated data, return just read block. - if (!accumulated_block) - { - return std::move(input_block); - } - - /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Block to_return = std::move(input_block); - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Accumulated block is already enough. - if (isEnoughSize(accumulated_block)) - { - /// Return accumulated data and place new block to accumulated data. - Block to_return = std::move(input_block); - std::swap(to_return, accumulated_block); - return to_return; - } - - append(std::move(input_block)); - if (isEnoughSize(accumulated_block)) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Squashed block is not ready. - return {}; -} - - -template -void SquashingLegacy::append(ReferenceType input_block) -{ - if (!accumulated_block) - { - accumulated_block = std::move(input_block); - return; - } - - assert(blocksHaveEqualStructure(input_block, accumulated_block)); - - try - { - for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) - { - const auto source_column = input_block.getByPosition(i).column; - - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); - mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); - accumulated_block.getByPosition(i).column = std::move(mutable_column); - } - } - catch (...) - { - /// add() may be called again even after a previous add() threw an exception. - /// Keep accumulated_block in a valid state. - /// Seems ok to discard accumulated data because we're throwing an exception, which the caller will - /// hopefully interpret to mean "this block and all *previous* blocks are potentially lost". - accumulated_block.clear(); - throw; - } -} - - -bool SquashingLegacy::isEnoughSize(const Block & block) -{ - size_t rows = 0; - size_t bytes = 0; - - for (const auto & [column, type, name] : block) - { - if (!column) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block."); - - if (!rows) - rows = column->size(); - else if (rows != column->size()) - throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); - - bytes += column->byteSize(); - } - - return isEnoughSize(rows, bytes); -} - - -bool SquashingLegacy::isEnoughSize(size_t rows, size_t bytes) const -{ - return (!min_block_size_rows && !min_block_size_bytes) - || (min_block_size_rows && rows >= min_block_size_rows) - || (min_block_size_bytes && bytes >= min_block_size_bytes); -} - - } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index 452317e7d5e..092f58f2fe0 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -30,35 +30,6 @@ private: Chunk finish_chunk; }; - -class SquashingLegacy -{ -public: - /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. - SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_); - - /** Add next block and possibly returns squashed block. - * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. - */ - Block add(Block && block); - Block add(const Block & block); - -private: - size_t min_block_size_rows; - size_t min_block_size_bytes; - - Block accumulated_block; - - template - Block addImpl(ReferenceType block); - - template - void append(ReferenceType block); - - bool isEnoughSize(const Block & block); - bool isEnoughSize(size_t rows, size_t bytes) const; -}; - class SimpleSquashingChunksTransform : public IInflatingTransform { public: @@ -73,7 +44,7 @@ protected: Chunk getRemaining() override; private: - SquashingLegacy squashing; + Squashing squashing; Chunk squashed_chunk; }; diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index aa86879e62c..567997c24ab 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -150,11 +151,7 @@ void TotalsHavingTransform::transform(Chunk & chunk) /// Block with values not included in `max_rows_to_group_by`. We'll postpone it. if (overflow_row) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); - - const auto * agg_info = typeid_cast(info.get()); + const auto & agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index a694fa43e46..86421adf4fb 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2716,42 +2716,42 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) { return std::make_shared(name, argument_types, parameters); - }, properties}, AggregateFunctionFactory::CaseInsensitive); + }, properties}, AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("dense_rank", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { return std::make_shared(name, argument_types, parameters); - }, properties}, AggregateFunctionFactory::CaseInsensitive); + }, properties}, AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("percent_rank", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { return std::make_shared(name, argument_types, parameters); - }, properties}, AggregateFunctionFactory::CaseInsensitive); + }, properties}, AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("row_number", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { return std::make_shared(name, argument_types, parameters); - }, properties}, AggregateFunctionFactory::CaseInsensitive); + }, properties}, AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("ntile", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { return std::make_shared(name, argument_types, parameters); - }, properties}, AggregateFunctionFactory::CaseInsensitive); + }, properties}, AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("nth_value", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { return std::make_shared( name, argument_types, parameters); - }, properties}, AggregateFunctionFactory::CaseInsensitive); + }, properties}, AggregateFunctionFactory::Case::Insensitive); factory.registerFunction("lagInFrame", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 25fbf13b0e7..98d66ed77c3 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -5,7 +5,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -16,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -24,9 +27,13 @@ #include #include #include +#include +#include +#include #include #include +#include namespace ProfileEvents @@ -105,7 +112,7 @@ private: class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform { public: - ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_); String getName() const override { return "ExecutingInnerQueryFromView"; } @@ -116,6 +123,7 @@ protected: private: ViewsDataPtr views_data; ViewRuntimeData & view; + bool disable_deduplication_for_children; struct State { @@ -138,7 +146,7 @@ class PushingToLiveViewSink final : public SinkToStorage public: PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToLiveViewSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; private: StorageLiveView & live_view; @@ -152,7 +160,7 @@ class PushingToWindowViewSink final : public SinkToStorage public: PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; private: StorageWindowView & window_view; @@ -216,45 +224,10 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); - // Do not deduplicate insertions into MV if the main insertion is Ok if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); } - else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && - !insert_settings.insert_deduplication_token.value.empty()) - { - /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle - * deduplication in complex INSERT flows. - * - * Example: - * - * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 - * | | - * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ - * - * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will - * be inserted into `ds_2_1`. - * - * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables - * are involved. - * - * Example: - * - * landing -┬--> mv_1_1 --┬-> ds_1_1 - * | | - * └--> mv_1_2 --┘ - * - */ - auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; - - if (view_id.hasUUID()) - insert_deduplication_token += "_" + toString(view_id.uuid); - else - insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); - - insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); - } // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) @@ -361,7 +334,13 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); + InterpreterInsertQuery interpreter( + nullptr, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; @@ -378,6 +357,10 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } +#ifdef ABORT_ON_LOGICAL_ERROR + out.addSource(std::make_shared("Before squashing", out.getInputHeader())); +#endif + auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); counting->setProgressCallback(insert_context->getProgressCallback()); @@ -420,11 +403,19 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { +#ifdef ABORT_ON_LOGICAL_ERROR + out.addSource(std::make_shared("Right after Inner query", out.getInputHeader())); +#endif + auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data); + storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); + +#ifdef ABORT_ON_LOGICAL_ERROR + out.addSource(std::make_shared("Right before Inner query", out.getInputHeader())); +#endif } return out; @@ -465,11 +456,7 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - /// If the "root" table deduplicates blocks, there are no need to make deduplication for children - /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks - bool disable_deduplication_for_children = false; - if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); + bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views; auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); @@ -560,12 +547,25 @@ Chain buildPushingToViewsChain( auto sink = std::make_shared(live_view_header, *live_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } + else if (dynamic_cast(storage.get())) + { + auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); + metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); + sink->setRuntimeData(thread_status, elapsed_counter_ms); + result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -573,8 +573,15 @@ Chain buildPushingToViewsChain( auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); + + result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(storage_header)); + } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -590,7 +597,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children) { const auto & context = view.context; @@ -637,6 +644,19 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); + pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); + + if (!disable_deduplication_for_children) + { + String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); + pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + } + else + { + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + } + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -728,17 +748,19 @@ IProcessor::Status CopyingDataToViewsTransform::prepare() ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( const Block & header, ViewRuntimeData & view_, - std::shared_ptr views_data_) + std::shared_ptr views_data_, + bool disable_deduplication_for_children_) : ExceptionKeepingTransform(header, view_.sample_block) , views_data(std::move(views_data_)) , view(view_) + , disable_deduplication_for_children(disable_deduplication_for_children_) { } void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { - auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data)); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children)); } @@ -770,10 +792,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi { } -void PushingToLiveViewSink::consume(Chunk chunk) +void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context); + live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.getColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -793,11 +815,11 @@ PushingToWindowViewSink::PushingToWindowViewSink( { } -void PushingToWindowViewSink::consume(Chunk chunk) +void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); + window_view, getHeader().cloneWithColumns(chunk.getColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index a2a42f27c3f..c33e87815b1 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -14,7 +14,8 @@ #include #include #include "IO/CompressionMethod.h" -#include "Parsers/ASTLiteral.h" +#include +#include namespace DB diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index ce5992c2548..364d7c69071 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -50,7 +50,7 @@ TEST(Processors, PortsNotConnected) processors->emplace_back(std::move(source)); processors->emplace_back(std::move(sink)); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD try { QueryStatusPtr element; diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp new file mode 100644 index 00000000000..f678d7984e8 --- /dev/null +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -0,0 +1,768 @@ +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + + +#include + +using namespace DB; + +namespace +{ + +QueryPipeline buildJoinPipeline( + std::shared_ptr left_source, + std::shared_ptr right_source, + size_t key_length = 1, + JoinKind kind = JoinKind::Inner, + JoinStrictness strictness = JoinStrictness::All, + ASOFJoinInequality asof_inequality = ASOFJoinInequality::None) +{ + Blocks inputs; + inputs.emplace_back(left_source->getPort().getHeader()); + inputs.emplace_back(right_source->getPort().getHeader()); + + Block out_header; + for (const auto & input : inputs) + { + for (ColumnWithTypeAndName column : input) + { + if (&input == &inputs.front()) + column.name = "t1." + column.name; + else + column.name = "t2." + column.name; + out_header.insert(column); + } + } + + TableJoin::JoinOnClause on_clause; + for (size_t i = 0; i < key_length; ++i) + { + on_clause.key_names_left.emplace_back(inputs[0].getByPosition(i).name); + on_clause.key_names_right.emplace_back(inputs[1].getByPosition(i).name); + } + + auto joining = std::make_shared( + kind, + strictness, + on_clause, + inputs, out_header, /* max_block_size = */ 0); + + if (asof_inequality != ASOFJoinInequality::None) + joining->setAsofInequality(asof_inequality); + + chassert(joining->getInputs().size() == 2); + + connect(left_source->getPort(), joining->getInputs().front()); + connect(right_source->getPort(), joining->getInputs().back()); + + auto * output_port = &joining->getOutputPort(); + + auto processors = std::make_shared(); + processors->emplace_back(std::move(left_source)); + processors->emplace_back(std::move(right_source)); + processors->emplace_back(std::move(joining)); + + QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port); + return pipeline; +} + + +std::shared_ptr oneColumnSource(const std::vector> & values) +{ + Block header = { + ColumnWithTypeAndName(std::make_shared(), "key"), + ColumnWithTypeAndName(std::make_shared(), "idx"), + }; + + UInt64 idx = 0; + Chunks chunks; + for (const auto & chunk_values : values) + { + auto key_column = ColumnUInt64::create(); + auto idx_column = ColumnUInt64::create(); + + for (auto n : chunk_values) + { + key_column->insertValue(n); + idx_column->insertValue(idx); + ++idx; + } + chunks.emplace_back(Chunk(Columns{std::move(key_column), std::move(idx_column)}, chunk_values.size())); + } + return std::make_shared(header, std::move(chunks)); +} + +class SourceChunksBuilder +{ +public: + + explicit SourceChunksBuilder(const Block & header_) + : header(header_) + { + current_chunk = header.cloneEmptyColumns(); + chassert(!current_chunk.empty()); + } + + void setBreakProbability(pcg64 & rng_) + { + /// random probability with possibility to have exact 0.0 and 1.0 values + break_prob = std::uniform_int_distribution(0, 5)(rng_) / static_cast(5); + rng = &rng_; + } + + void addRow(const std::vector & row) + { + chassert(row.size() == current_chunk.size()); + for (size_t i = 0; i < current_chunk.size(); ++i) + current_chunk[i]->insert(row[i]); + + if (rng && std::uniform_real_distribution<>(0.0, 1.0)(*rng) < break_prob) + addChunk(); + } + + void addChunk() + { + if (current_chunk.front()->empty()) + return; + + size_t rows = current_chunk.front()->size(); + chunks.emplace_back(std::move(current_chunk), rows); + current_chunk = header.cloneEmptyColumns(); + } + + std::shared_ptr getSource() + { + addChunk(); + + /// copy chunk to allow reusing same builder + Chunks chunks_copy; + chunks_copy.reserve(chunks.size()); + for (const auto & chunk : chunks) + chunks_copy.emplace_back(chunk.clone()); + return std::make_shared(header, std::move(chunks_copy)); + } + +private: + Block header; + Chunks chunks; + MutableColumns current_chunk; + + pcg64 * rng = nullptr; + double break_prob = 0.0; +}; + + +std::vector> getValuesFromBlock(const Block & block, const Names & names) +{ + std::vector> result; + for (size_t i = 0; i < block.rows(); ++i) + { + auto & row = result.emplace_back(); + for (const auto & name : names) + block.getByName(name).column->get(i, row.emplace_back()); + } + return result; +} + + +Block executePipeline(QueryPipeline && pipeline) +{ + PullingPipelineExecutor executor(pipeline); + + Blocks result_blocks; + while (true) + { + Block block; + bool is_ok = executor.pull(block); + if (!is_ok) + break; + result_blocks.emplace_back(std::move(block)); + } + + return concatenateBlocks(result_blocks); +} + +template +void assertColumnVectorEq(const typename ColumnVector::Container & expected, const Block & block, const std::string & name) +{ + const auto * actual = typeid_cast *>(block.getByName(name).column.get()); + ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector).name(); + + auto get_first_diff = [&]() -> String + { + const auto & actual_data = actual->getData(); + size_t num_rows = std::min(expected.size(), actual_data.size()); + for (size_t i = 0; i < num_rows; ++i) + { + if (expected[i] != actual_data[i]) + return fmt::format(", expected: {}, actual: {} at row {}", expected[i], actual_data[i], i); + } + return ""; + }; + + EXPECT_EQ(actual->getData().size(), expected.size()); + ASSERT_EQ(actual->getData(), expected) << "column name: " << name << get_first_diff(); +} + +template +void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name) +{ + const ColumnPtr & actual = block.getByName(name).column; + ASSERT_TRUE(checkColumn(*actual)); + ASSERT_TRUE(checkColumn(expected)); + EXPECT_EQ(actual->size(), expected.size()); + + auto dump_val = [](const IColumn & col, size_t i) -> String + { + Field value; + col.get(i, value); + return value.dump(); + }; + + size_t num_rows = std::min(actual->size(), expected.size()); + for (size_t i = 0; i < num_rows; ++i) + ASSERT_EQ(actual->compareAt(i, i, expected, 1), 0) << dump_val(*actual, i) << " != " << dump_val(expected, i) << " at row " << i; +} + +template +T getRandomFrom(pcg64 & rng, const std::initializer_list & opts) +{ + std::vector options(opts.begin(), opts.end()); + size_t idx = std::uniform_int_distribution(0, options.size() - 1)(rng); + return options[idx]; +} + +void generateNextKey(pcg64 & rng, UInt64 & k1, String & k2) +{ + size_t str_len = std::uniform_int_distribution<>(1, 10)(rng); + String new_k2 = getRandomASCIIString(str_len, rng); + if (new_k2.compare(k2) <= 0) + ++k1; + k2 = new_k2; +} + +bool isStrict(ASOFJoinInequality inequality) +{ + return inequality == ASOFJoinInequality::Less || inequality == ASOFJoinInequality::Greater; +} + +} + +class FullSortingJoinTest : public ::testing::Test +{ +public: + FullSortingJoinTest() = default; + + void SetUp() override + { + Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); + Poco::Logger::root().setChannel(channel); + if (const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe) + Poco::Logger::root().setLevel(test_log_level); + else + Poco::Logger::root().setLevel("none"); + + + UInt64 seed = randomSeed(); + if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe) + seed = std::stoull(random_seed); + std::cout << "TEST_RANDOM_SEED=" << seed << std::endl; + rng = pcg64(seed); + } + + void TearDown() override + { + } + + pcg64 rng; +}; + +TEST_F(FullSortingJoinTest, AllAnyOneKey) +try +{ + { + SCOPED_TRACE("Inner All"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {1, 2, 3, 4, 5} }), + oneColumnSource({ {1}, {2}, {3}, {4}, {5} }), + 1, JoinKind::Inner, JoinStrictness::All)); + + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {1, 2, 3, 4, 5} }), + oneColumnSource({ {1}, {2}, {3}, {4}, {5} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner All"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::All)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 0, 1, 2, 3, 3, 4, 5}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 4, 4, 4, 3, 4, 5, 5}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 5}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }), + oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 5}), result, "t2.idx"); + } + { + + SCOPED_TRACE("Left Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Left, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx"); + } + { + SCOPED_TRACE("Left Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }), + oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }), + 1, JoinKind::Left, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx"); + } +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + + +TEST_F(FullSortingJoinTest, AnySimple) +try +{ + JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right}); + + SourceChunksBuilder left_source({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "attr"}, + }); + + left_source.setBreakProbability(rng); + right_source.setBreakProbability(rng); + + size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng); + + auto expected_left = ColumnString::create(); + auto expected_right = ColumnString::create(); + + UInt64 k1 = 1; + String k2; + + auto get_attr = [&](const String & side, size_t idx) -> String + { + return toString(k1) + "_" + k2 + "_" + side + "_" + toString(idx); + }; + + for (size_t i = 0; i < num_keys; ++i) + { + generateNextKey(rng, k1, k2); + + /// Key is present in left, right or both tables. Both tables is more probable. + size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng); + + size_t num_rows_left = key_presence == 0 ? 0 : std::uniform_int_distribution<>(1, 10)(rng); + for (size_t j = 0; j < num_rows_left; ++j) + left_source.addRow({k1, k2, get_attr("left", j)}); + + size_t num_rows_right = key_presence == 1 ? 0 : std::uniform_int_distribution<>(1, 10)(rng); + for (size_t j = 0; j < num_rows_right; ++j) + right_source.addRow({k1, k2, get_attr("right", j)}); + + String left_attr = num_rows_left ? get_attr("left", 0) : ""; + String right_attr = num_rows_right ? get_attr("right", 0) : ""; + + if (kind == JoinKind::Inner && num_rows_left && num_rows_right) + { + expected_left->insert(left_attr); + expected_right->insert(right_attr); + } + else if (kind == JoinKind::Left) + { + for (size_t j = 0; j < num_rows_left; ++j) + { + expected_left->insert(get_attr("left", j)); + expected_right->insert(right_attr); + } + } + else if (kind == JoinKind::Right) + { + for (size_t j = 0; j < num_rows_right; ++j) + { + expected_left->insert(left_attr); + expected_right->insert(get_attr("right", j)); + } + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, + kind, JoinStrictness::Any)); + assertColumnEq(*expected_left, result_block, "t1.attr"); + assertColumnEq(*expected_right, result_block, "t2.attr"); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +TEST_F(FullSortingJoinTest, AsofSimple) +try +{ + SourceChunksBuilder left_source({ + {std::make_shared(), "key"}, + {std::make_shared(), "t"}, + }); + left_source.addRow({"AMZN", 3}); + left_source.addRow({"AMZN", 4}); + left_source.addRow({"AMZN", 6}); + left_source.addRow({"SBUX", 10}); + + SourceChunksBuilder right_source({ + {std::make_shared(), "key"}, + {std::make_shared(), "t"}, + {std::make_shared(), "value"}, + }); + right_source.addRow({"AAPL", 1, 97}); + right_source.addChunk(); + right_source.addRow({"AAPL", 2, 98}); + right_source.addRow({"AAPL", 3, 99}); + right_source.addRow({"AMZN", 1, 100}); + right_source.addRow({"AMZN", 2, 110}); + right_source.addChunk(); + right_source.addRow({"AMZN", 2, 110}); + right_source.addChunk(); + right_source.addRow({"AMZN", 4, 130}); + right_source.addRow({"AMZN", 5, 140}); + right_source.addRow({"SBUX", 8, 180}); + right_source.addChunk(); + right_source.addRow({"SBUX", 9, 190}); + + { + Block result_block = executePipeline(buildJoinPipeline( + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals)); + auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + + ASSERT_EQ(values, (std::vector>{ + {"AMZN", 3u, 4u, 130u}, + {"AMZN", 4u, 4u, 130u}, + })); + } + + { + Block result_block = executePipeline(buildJoinPipeline( + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals)); + auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + + ASSERT_EQ(values, (std::vector>{ + {"AMZN", 3u, 2u, 110u}, + {"AMZN", 4u, 4u, 130u}, + {"AMZN", 6u, 5u, 140u}, + {"SBUX", 10u, 9u, 190u}, + })); + } +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + + +TEST_F(FullSortingJoinTest, AsofOnlyColumn) +try +{ + auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} }); + + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "t"}, + {std::make_shared(), "value"}, + }); + + right_source_builder.setBreakProbability(rng); + + for (const auto & row : std::vector>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} }) + right_source_builder.addRow(row); + + auto right_source = right_source_builder.getSource(); + + auto pipeline = buildJoinPipeline( + left_source, right_source, /* key_length = */ 1, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals); + + Block result_block = executePipeline(std::move(pipeline)); + + ASSERT_EQ( + assert_cast(result_block.getByName("t1.key").column.get())->getData(), + (ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10}) + ); + + ASSERT_EQ( + assert_cast(result_block.getByName("t2.t").column.get())->getData(), + (ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 11}) + ); + + ASSERT_EQ( + assert_cast(result_block.getByName("t2.value").column.get())->getData(), + (ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 111}) + ); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData) +try +{ + /// Generate data random and build expected result at the same time. + + /// Test specific combinations of join kind and inequality per each run + auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); + auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals }); + + SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); + + /// Key is complex, `k1, k2` for equality and `t` for asof + SourceChunksBuilder left_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + /// How small generated block should be + left_source_builder.setBreakProbability(rng); + right_source_builder.setBreakProbability(rng); + + /// We are going to generate sorted data and remember expected result + ColumnInt64::Container expected; + + UInt64 k1 = 1; + String k2; + auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); + for (size_t key_num = 0; key_num < key_num_total; ++key_num) + { + /// Generate new key greater than previous + generateNextKey(rng, k1, k2); + + Int64 left_t = 0; + /// Generate several rows for the key + size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng); + for (size_t i = 0; i < num_left_rows; ++i) + { + /// t is strictly greater than previous + left_t += std::uniform_int_distribution<>(1, 10)(rng); + + auto left_arrtibute_value = 10 * left_t; + left_source_builder.addRow({k1, k2, left_t, left_arrtibute_value}); + expected.push_back(left_arrtibute_value); + + auto num_matches = 1 + std::poisson_distribution<>(4)(rng); + /// Generate several matches in the right table + auto right_t = left_t; + for (size_t j = 0; j < num_matches; ++j) + { + int min_step = isStrict(asof_inequality) ? 1 : 0; + right_t += std::uniform_int_distribution<>(min_step, 3)(rng); + + /// First row should match + bool is_match = j == 0; + right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * left_arrtibute_value : -1}); + } + /// Next left_t should be greater than right_t not to match with previous rows + left_t = right_t; + } + + /// generate some rows with greater left_t to check that they are not matched + num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0; + for (size_t i = 0; i < num_left_rows; ++i) + { + left_t += std::uniform_int_distribution<>(1, 10)(rng); + left_source_builder.addRow({k1, k2, left_t, -10 * left_t}); + + if (join_kind == JoinKind::Left) + expected.push_back(-10 * left_t); + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source_builder.getSource(), right_source_builder.getSource(), + /* key_length = */ 3, + join_kind, JoinStrictness::Asof, asof_inequality)); + + assertColumnVectorEq(expected, result_block, "t1.attr"); + + for (auto & e : expected) + /// Non matched rows from left table have negative attr + /// Value if attribute in right table is 10 times greater than in left table + e = e < 0 ? 0 : 10 * e; + + assertColumnVectorEq(expected, result_block, "t2.attr"); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData) +try +{ + /// Generate data random and build expected result at the same time. + + /// Test specific combinations of join kind and inequality per each run + auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); + auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals }); + + SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); + + SourceChunksBuilder left_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + left_source_builder.setBreakProbability(rng); + right_source_builder.setBreakProbability(rng); + + ColumnInt64::Container expected; + + UInt64 k1 = 1; + String k2; + UInt64 left_t = 0; + + auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); + for (size_t key_num = 0; key_num < key_num_total; ++key_num) + { + /// Generate new key greater than previous + generateNextKey(rng, k1, k2); + + /// Generate some rows with smaller left_t to check that they are not matched + size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0; + for (size_t i = 0; i < num_left_rows; ++i) + { + left_t += std::uniform_int_distribution<>(1, 10)(rng); + left_source_builder.addRow({k1, k2, left_t, -10 * left_t}); + + if (join_kind == JoinKind::Left) + expected.push_back(-10 * left_t); + } + + if (std::bernoulli_distribution(0.1)(rng)) + continue; + + size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng); + auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng); + auto attribute_value = 10 * right_t; + for (size_t j = 0; j < num_right_matches; ++j) + { + right_t += std::uniform_int_distribution<>(0, 3)(rng); + bool is_match = j == num_right_matches - 1; + right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * attribute_value : -1}); + } + + /// Next left_t should be greater than (or equals) right_t to match with previous rows + left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng); + size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng); + for (size_t j = 0; j < num_left_matches; ++j) + { + left_t += std::uniform_int_distribution<>(0, 3)(rng); + left_source_builder.addRow({k1, k2, left_t, attribute_value}); + expected.push_back(attribute_value); + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source_builder.getSource(), right_source_builder.getSource(), + /* key_length = */ 3, + join_kind, JoinStrictness::Asof, asof_inequality)); + + assertColumnVectorEq(expected, result_block, "t1.attr"); + + for (auto & e : expected) + /// Non matched rows from left table have negative attr + /// Value if attribute in right table is 10 times greater than in left table + e = e < 0 ? 0 : 10 * e; + + assertColumnVectorEq(expected, result_block, "t2.attr"); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index f0b2ead687e..a9e5b1535c0 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -193,7 +193,7 @@ public: return concurrency_control; } - void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } + void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp index 2cd4dc42a83..bb2be2c8ffb 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -5,7 +5,7 @@ namespace DB { -QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept +QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); @@ -16,6 +16,12 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHo return *this; } +QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept +{ + append(std::move(rhs)); + return *this; +} + QueryPlanResourceHolder::QueryPlanResourceHolder() = default; QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default; QueryPlanResourceHolder::~QueryPlanResourceHolder() = default; diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index ed9eb68b7ba..10f7f39ab09 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -20,8 +20,11 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); + QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; + /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; + QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 14457d2df43..b08f2002f64 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -907,4 +908,10 @@ void RemoteQueryExecutor::setProfileInfoCallback(ProfileInfoCallback callback) std::lock_guard guard(was_cancelled_mutex); profile_info_callback = std::move(callback); } + +bool RemoteQueryExecutor::needToSkipUnavailableShard() const +{ + return context->getSettingsRef().skip_unavailable_shards && (0 == connections->size()); +} + } diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 6b1539bd08e..04a59cc3b7e 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -218,7 +218,7 @@ public: IConnections & getConnections() { return *connections; } - bool needToSkipUnavailableShard() const { return context->getSettingsRef().skip_unavailable_shards && (0 == connections->size()); } + bool needToSkipUnavailableShard() const; bool isReplicaUnavailable() const { return extension && extension->parallel_reading_coordinator && connections->size() == 0; } diff --git a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp index c8ab2e3a973..34bc2eb2b5e 100644 --- a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp +++ b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp @@ -133,7 +133,7 @@ TEST(CheckSortedTransform, CheckBadLastRow) EXPECT_NO_THROW(executor.pull(chunk)); EXPECT_NO_THROW(executor.pull(chunk)); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -158,7 +158,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock1) Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -181,7 +181,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock2) PullingPipelineExecutor executor(pipeline); Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -204,7 +204,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock3) PullingPipelineExecutor executor(pipeline); Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index cb36df1efc0..d8a4d7f0e1f 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -1082,8 +1083,8 @@ namespace read_buffer = wrapReadBufferWithCompressionMethod(std::move(read_buffer), input_compression_method); assert(!pipeline); - auto source = query_context->getInputFormat( - input_format, *read_buffer, header, query_context->getSettings().max_insert_block_size); + auto source + = query_context->getInputFormat(input_format, *read_buffer, header, query_context->getSettingsRef().max_insert_block_size); pipeline = std::make_unique(std::move(source)); pipeline_executor = std::make_unique(*pipeline); @@ -1151,8 +1152,7 @@ namespace external_table_context->applySettingsChanges(settings_changes); } auto in = external_table_context->getInputFormat( - format, *buf, metadata_snapshot->getSampleBlock(), - external_table_context->getSettings().max_insert_block_size); + format, *buf, metadata_snapshot->getSampleBlock(), external_table_context->getSettingsRef().max_insert_block_size); QueryPipelineBuilder cur_pipeline; cur_pipeline.init(Pipe(std::move(in))); diff --git a/src/Server/HTTP/sendExceptionToHTTPClient.cpp b/src/Server/HTTP/sendExceptionToHTTPClient.cpp index 78650758e35..022a763a9a2 100644 --- a/src/Server/HTTP/sendExceptionToHTTPClient.cpp +++ b/src/Server/HTTP/sendExceptionToHTTPClient.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp index d42bd77e339..48a3b39a3e7 100644 --- a/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp +++ b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp @@ -1,5 +1,6 @@ #include +#include #include #include diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 370af79e456..c8a58527f2c 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 45c28babe3a..e46021c8e68 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp index c973be040c8..1972c3263b2 100644 --- a/src/Server/KeeperReadinessHandler.cpp +++ b/src/Server/KeeperReadinessHandler.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 5f26542d39c..b61df45133a 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -3,6 +3,7 @@ #if USE_NURAFT # include +# include # include # include # include @@ -621,8 +622,9 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response { LOG_INFO( log, - "Total time to process a request took too long ({}ms).\nRequest info: {}", - elapsed, + "Total time to process a request in session {} took too long ({}ms).\nRequest info: {}", + session_id, + elapsed_ms, request->toString(/*short_format=*/true)); } diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index c0f015bfcd5..b6d795b1e69 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 2f891ebf810..f632ef51c00 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "IO/ReadBufferFromString.h" #include "IServer.h" diff --git a/src/Server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp index 38f56921c89..304021ee8dc 100644 --- a/src/Server/NotFoundHandler.cpp +++ b/src/Server/NotFoundHandler.cpp @@ -1,6 +1,7 @@ #include #include +#include #include namespace DB diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 8ba8421e6f0..cde31b4c58a 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #if USE_SSL # include diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 1f3e038a1f5..87c106c3fc0 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -30,15 +30,8 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe response.setContentType("text/plain; version=0.0.4; charset=UTF-8"); WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event); - try - { - metrics_writer->write(wb); - wb.finalize(); - } - catch (...) - { - wb.finalize(); - } + metrics_writer->write(wb); + wb.finalize(); } catch (...) { diff --git a/src/Server/ProxyV1Handler.cpp b/src/Server/ProxyV1Handler.cpp index d5e6ab23360..5b33a119cef 100644 --- a/src/Server/ProxyV1Handler.cpp +++ b/src/Server/ProxyV1Handler.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 67823117758..f43357db0a8 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -7,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index 331b7a84857..f3981dea9fb 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -4,13 +4,14 @@ #include "HTTPHandlerFactory.h" #include "HTTPResponseHeaderWriter.h" +#include #include #include #include -#include #include -#include +#include #include +#include #include diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 8d69df8de76..5bc2d09df35 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -666,7 +666,7 @@ void TCPHandler::runImpl() // Server should die on std logic errors in debug, like with assert() // or ErrorCodes::LOGICAL_ERROR. This helps catch these errors in // tests. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD catch (const std::logic_error & e) { state.io.onException(); @@ -888,12 +888,11 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { - squashing.header = state.block_for_insert; - auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); - if (planned_chunk.hasChunkInfo()) + squashing.setHeader(state.block_for_insert.cloneEmpty()); + auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); + if (result_chunk) { - Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); + auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -902,12 +901,13 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto planned_chunk = squashing.flush(); - Chunk result_chunk; - if (planned_chunk.hasChunkInfo()) - result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + Chunk result_chunk = Squashing::squash(squashing.flush()); + if (!result_chunk) + { + return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); + } - auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); + auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 68d3ff0b325..a3d098014e7 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -5,8 +5,9 @@ #include #include -#include +#include #include +#include #include #include diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 35a5e95e643..7891042bb96 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -33,9 +34,9 @@ #include #include #include +#include #include #include -#include #include diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index c07583cd39d..da749812167 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -701,15 +701,15 @@ std::optional ColumnsDescription::tryGetColumn(const GetColumns auto jt = subcolumns.get<0>().find(column_name); if (jt != subcolumns.get<0>().end()) return *jt; - } - /// Check for dynamic subcolumns. - auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); - it = columns.get<1>().find(ordinary_column_name); - if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) - { - if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) - return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type); + /// Check for dynamic subcolumns. + auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name); + it = columns.get<1>().find(ordinary_column_name); + if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns()) + { + if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name)) + return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type); + } } return {}; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index e556bda2561..b2ce62caf0a 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -134,7 +135,7 @@ DistributedSink::DistributedSink( } -void DistributedSink::consume(Chunk chunk) +void DistributedSink::consume(Chunk & chunk) { if (is_first_chunk) { @@ -142,7 +143,7 @@ void DistributedSink::consume(Chunk chunk) is_first_chunk = false; } - auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns()); if (insert_sync) writeSync(ordinary_block); @@ -420,7 +421,13 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); + InterpreterInsertQuery interp( + copy_query_ast, + job.local_context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -715,7 +722,13 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp(query_ast, context, allow_materialized); + InterpreterInsertQuery interp( + query_ast, + context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index a4c95633595..5b7396f2c6f 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -49,7 +49,7 @@ public: const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: diff --git a/src/Storages/FileLog/FileLogSettings.cpp b/src/Storages/FileLog/FileLogSettings.cpp index 8e245285b9a..cf2d6b64d7c 100644 --- a/src/Storages/FileLog/FileLogSettings.cpp +++ b/src/Storages/FileLog/FileLogSettings.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index abd4b4ce23b..0f9bd8b6ff9 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,7 +740,14 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter(insert, new_context, false, true, true); + InterpreterInsertQuery interpreter( + insert, + new_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); + auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 28d8128e052..9063437c065 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -321,7 +321,7 @@ public: Chunk generateChunkFromMetadata() { - size_t num_rows = std::min(current_file_remained_rows, UInt64(getContext()->getSettings().max_block_size)); + size_t num_rows = std::min(current_file_remained_rows, UInt64(getContext()->getSettingsRef().max_block_size)); current_file_remained_rows -= num_rows; Block source_block; @@ -964,7 +964,7 @@ HiveFiles StorageHive::collectHiveFiles( /// Hive files to collect HiveFiles hive_files; Int64 hit_parttions_num = 0; - Int64 hive_max_query_partitions = context_->getSettings().max_partitions_to_read; + Int64 hive_max_query_partitions = context_->getSettingsRef().max_partitions_to_read; /// Mutext to protect hive_files, which maybe appended in multiple threads std::mutex hive_files_mutex; ThreadPool pool{CurrentMetrics::StorageHiveThreads, CurrentMetrics::StorageHiveThreadsActive, CurrentMetrics::StorageHiveThreadsScheduled, max_threads}; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 1f7ac23eb82..3b9c6ff28fa 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index 9c5b29ae265..63467603d16 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/IStorage_fwd.h b/src/Storages/IStorage_fwd.h index b9243b029b0..4cbc586a745 100644 --- a/src/Storages/IStorage_fwd.h +++ b/src/Storages/IStorage_fwd.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -9,9 +10,10 @@ namespace DB { class IStorage; +struct SnapshotDetachedTable; using ConstStoragePtr = std::shared_ptr; using StoragePtr = std::shared_ptr; using Tables = std::map; - +using SnapshotDetachedTables = std::map; } diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index f5c5d093ce1..809401bb279 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1099,7 +1099,13 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true); + InterpreterInsertQuery interpreter( + insert, + kafka_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 792133ced64..9803fa0a160 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -71,9 +71,9 @@ public: new_hash.reset(); } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 57a1ea302f9..71b1a0a73c9 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,12 +21,14 @@ limitations under the License. */ #include #include #include +#include #include #include #include #include #include #include +#include #include #include @@ -330,7 +332,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) +void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -407,6 +409,21 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) builder = interpreter.buildQueryPipeline(); } + builder.addSimpleTransform([&](const Block & cur_header) + { + return std::make_shared(chunk_infos.clone(), cur_header); + }); + + String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted(); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(live_view_id, stream_header); + }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header); + }); + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 91daac32c7b..12d8e898347 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(const Block & block, ContextPtr context); + void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); void refresh(); diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index bc8cb0ce69a..13394690227 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -356,7 +357,7 @@ void RefreshTask::refreshTask() stop_requested = true; tryLogCurrentException(log, "Unexpected exception in refresh scheduling, please investigate. The view will be stopped."); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD abortOnFailedAssertion("Unexpected exception in refresh scheduling"); #endif } @@ -377,7 +378,13 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr +#include #include #include #include @@ -54,7 +55,7 @@ ApproximateNearestNeighborInformation::Metric stringToMetric(std::string_view me ApproximateNearestNeighborCondition::ApproximateNearestNeighborCondition(const SelectQueryInfo & query_info, ContextPtr context) : block_with_constants(KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)) , index_granularity(context->getMergeTreeSettings().index_granularity) - , max_limit_for_ann_queries(context->getSettings().max_limit_for_ann_queries) + , max_limit_for_ann_queries(context->getSettingsRef().max_limit_for_ann_queries) , index_is_useful(checkQueryStructure(query_info)) {} diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp index 9d64592ed64..6606b00e287 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp @@ -1,7 +1,8 @@ +#include +#include +#include #include #include -#include -#include #include diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index b7fdcc63e1b..8e73021d3e7 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index c2e0e778220..c2f87018872 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -651,15 +652,12 @@ size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const return checksum->second.file_size; } -String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(bool with_subcolumns) const +String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const NamesAndTypesList & available_columns) const { - auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns(with_subcolumns); - auto columns_list = columns_description.get(options); - std::optional minimum_size_column; UInt64 minimum_size = std::numeric_limits::max(); - for (const auto & column : columns_list) + for (const auto & column : available_columns) { if (!hasColumnFiles(column)) continue; @@ -2339,21 +2337,26 @@ String IMergeTreeDataPart::getUniqueId() const return getDataPartStorage().getUniqueId(); } +UInt128 IMergeTreeDataPart::getPartBlockIDHash() const +{ + SipHash hash; + checksums.computeTotalChecksumDataOnly(hash); + return hash.get128(); +} + String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const { if (info.level != 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get block id for non zero level part {}", name); - SipHash hash; if (token.empty()) { - checksums.computeTotalChecksumDataOnly(hash); - } - else - { - hash.update(token.data(), token.size()); + const auto hash_value = getPartBlockIDHash(); + return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); } + SipHash hash; + hash.update(token.data(), token.size()); const auto hash_value = hash.get128(); return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 571f1389e10..85ef0472ce7 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -196,7 +196,9 @@ public: /// Returns the name of a column with minimum compressed size (as returned by getColumnSize()). /// If no checksums are present returns the name of the first physically existing column. - String getColumnNameWithMinimumCompressedSize(bool with_subcolumns) const; + /// We pass a list of available columns since the ones available in the current storage snapshot might be smaller + /// than the one the table has (e.g a DROP COLUMN happened) and we don't want to get a column not in the snapshot + String getColumnNameWithMinimumCompressedSize(const NamesAndTypesList & available_columns) const; bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); } @@ -210,6 +212,7 @@ public: /// Compute part block id for zero level part. Otherwise throws an exception. /// If token is not empty, block id is calculated based on it instead of block data + UInt128 getPartBlockIDHash() const; String getZeroLevelPartBlockID(std::string_view token) const; void setName(const String & new_name); diff --git a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h index ccc88079daa..7d4fb1df1c2 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h @@ -47,7 +47,7 @@ public: virtual std::optional getColumnPosition(const String & column_name) const = 0; - virtual String getColumnNameWithMinimumCompressedSize(bool with_subcolumns) const = 0; + virtual String getColumnNameWithMinimumCompressedSize(const NamesAndTypesList & available_columns) const = 0; virtual const MergeTreeDataPartChecksums & getChecksums() const = 0; diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index d9e9a433827..adaa4eddb98 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -1,11 +1,10 @@ #pragma once +#include #include -#include #include #include #include -#include #include #include @@ -13,6 +12,9 @@ namespace DB { +struct MergeTreeSettings; +using MergeTreeSettingsPtr = std::shared_ptr; + Block getBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation); Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * permutation); diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index 89c813ab233..70e838e666a 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index a9b058418ea..cfcfb177e05 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -1,16 +1,18 @@ #pragma once #include -#include -#include -#include #include #include +#include +#include #include namespace DB { +struct MergeTreeSettings; +using MergeTreeSettingsPtr = std::shared_ptr; + class IMergedBlockOutputStream { public: diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index d38001a0feb..40eef8e7431 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h index f5111ccaacc..aff1cf0edb0 100644 --- a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h @@ -36,7 +36,10 @@ public: AlterConversionsPtr getAlterConversions() const override { return alter_conversions; } - String getColumnNameWithMinimumCompressedSize(bool with_subcolumns) const override { return data_part->getColumnNameWithMinimumCompressedSize(with_subcolumns); } + String getColumnNameWithMinimumCompressedSize(const NamesAndTypesList & available_columns) const override + { + return data_part->getColumnNameWithMinimumCompressedSize(available_columns); + } const MergeTreeDataPartChecksums & getChecksums() const override { return data_part->checksums; } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 79efb0ca8b3..4c947487d21 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index c8f1a08128b..fc64fae9a58 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -422,6 +424,16 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() return false; } +bool MergeTask::enabledBlockNumberColumn(GlobalRuntimeContextPtr global_ctx) +{ + return global_ctx->data->getSettings()->enable_block_number_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); +} + +bool MergeTask::enabledBlockOffsetColumn(GlobalRuntimeContextPtr global_ctx) +{ + return global_ctx->data->getSettings()->enable_block_offset_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); +} + void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type) { if (global_ctx->storage_columns.contains(name)) diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 56909d1b7a0..8b0f2130e8e 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -408,15 +408,8 @@ private: Stages::const_iterator stages_iterator = stages.begin(); - static bool enabledBlockNumberColumn(GlobalRuntimeContextPtr global_ctx) - { - return global_ctx->data->getSettings()->enable_block_number_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); - } - - static bool enabledBlockOffsetColumn(GlobalRuntimeContextPtr global_ctx) - { - return global_ctx->data->getSettings()->enable_block_offset_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); - } + static bool enabledBlockNumberColumn(GlobalRuntimeContextPtr global_ctx); + static bool enabledBlockOffsetColumn(GlobalRuntimeContextPtr global_ctx); static void addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type); }; diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index 8cb0badc19b..4a4deb07eee 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -155,6 +155,10 @@ void printExceptionWithRespectToAbort(LoggerPtr log, const String & query_id) { std::rethrow_exception(ex); } + catch (const TestException &) // NOLINT + { + /// Exception from a unit test, ignore it. + } catch (const Exception & e) { NOEXCEPT_SCOPE({ diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 570387a7046..aaa4ecd8eee 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -127,7 +127,8 @@ NameSet injectRequiredColumns( */ if (!have_at_least_one_physical_column) { - const auto minimum_size_column_name = data_part_info_for_reader.getColumnNameWithMinimumCompressedSize(with_subcolumns); + auto available_columns = storage_snapshot->metadata->getColumns().get(options); + const auto minimum_size_column_name = data_part_info_for_reader.getColumnNameWithMinimumCompressedSize(available_columns); columns.push_back(minimum_size_column_name); /// correctly report added column injected_columns.insert(columns.back()); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e31f6db5409..faf2741a456 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,7 @@ #include #include #include +#include #include #include #include @@ -474,7 +476,7 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByPredicate( const StorageSnapshotPtr & storage_snapshot, const ActionsDAGPtr & filter_dag, ContextPtr local_context) const { - if (!local_context->getSettings().allow_statistics_optimize) + if (!local_context->getSettingsRef().allow_statistics_optimize) return {}; const auto & parts = assert_cast(*storage_snapshot->data).parts; @@ -1142,7 +1144,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false); if (!filter_dag) return {}; @@ -6173,6 +6175,11 @@ bool MergeTreeData::hasProjection() const return false; } +bool MergeTreeData::areAsynchronousInsertsEnabled() const +{ + return getSettings()->async_insert; +} + MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const { ProjectionPartsVector res; @@ -7394,6 +7401,13 @@ std::pair MergeTreeData::cloneAn return std::make_pair(dst_data_part, std::move(temporary_directory_lock)); } +bool MergeTreeData::canUseAdaptiveGranularity() const +{ + const auto settings = getSettings(); + return settings->index_granularity_bytes != 0 + && (settings->enable_mixed_granularity_parts || !has_non_adaptive_index_granularity_parts); +} + String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const { return disk->getPath() + relative_data_path; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c8b721038c6..7076b680521 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -65,6 +64,7 @@ using BackupEntries = std::vector; +struct MergeTreeSettings; struct WriteSettings; /// Auxiliary struct holding information about the future merged or mutated part. @@ -441,7 +441,7 @@ public: bool hasProjection() const override; - bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } + bool areAsynchronousInsertsEnabled() const override; bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; @@ -860,12 +860,7 @@ public: /// Returns true if table can create new parts with adaptive granularity /// Has additional constraint in replicated version - virtual bool canUseAdaptiveGranularity() const - { - const auto settings = getSettings(); - return settings->index_granularity_bytes != 0 && - (settings->enable_mixed_granularity_parts || !has_non_adaptive_index_granularity_parts); - } + virtual bool canUseAdaptiveGranularity() const; /// Get constant pointer to storage settings. /// Copy this pointer into your scope and you will @@ -1735,14 +1730,6 @@ struct CurrentlySubmergingEmergingTagger ~CurrentlySubmergingEmergingTagger(); }; - -/// TODO: move it somewhere -[[ maybe_unused ]] static bool needSyncPart(size_t input_rows, size_t input_bytes, const MergeTreeSettings & settings) -{ - return ((settings.min_rows_to_fsync_after_merge && input_rows >= settings.min_rows_to_fsync_after_merge) - || (settings.min_compressed_bytes_to_fsync_after_merge && input_bytes >= settings.min_compressed_bytes_to_fsync_after_merge)); -} - /// Look at MutationCommands if it contains mutations for AlterConversions, update the counter. /// Return true if the counter had been updated bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic & alter_conversions_mutations, bool remove); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 2d49e1df19b..140a226f2d1 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -117,11 +118,11 @@ UInt64 MergeTreeDataMergerMutator::getMaxSourcePartSizeForMutation() const occupied >= data_settings->max_number_of_mutations_for_replica) return 0; - /// DataPart can be store only at one disk. Get maximum reservable free space at all disks. + /// A DataPart can be stored only at a single disk. Get the maximum reservable free space at all disks. UInt64 disk_space = data.getStoragePolicy()->getMaxUnreservedFreeSpace(); auto max_tasks_count = data.getContext()->getMergeMutateExecutor()->getMaxTasksCount(); - /// Allow mutations only if there are enough threads, leave free threads for merges else + /// Allow mutations only if there are enough threads, otherwise, leave free threads for merges. if (occupied <= 1 || max_tasks_count - occupied >= data_settings->number_of_free_entries_in_pool_to_execute_mutation) return static_cast(disk_space / DISK_USAGE_COEFFICIENT_TO_RESERVE); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index ee1a9b7f8ed..5bab523a9f1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -266,10 +266,13 @@ void MergeTreeDataPartWide::doCheckConsistency(bool require_part_metadata) const bool MergeTreeDataPartWide::hasColumnFiles(const NameAndTypePair & column) const { + auto serialization = tryGetSerialization(column.name); + if (!serialization) + return false; auto marks_file_extension = index_granularity_info.mark_type.getFileExtension(); bool res = true; - getSerialization(column.name)->enumerateStreams([&](const auto & substream_path) + serialization->enumerateStreams([&](const auto & substream_path) { auto stream_name = getStreamNameForColumn(column, substream_path, checksums); if (!stream_name || !checksums.files.contains(*stream_name + marks_file_extension)) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 5c9191dbb54..6dc7e649b06 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 74ea89a8864..3fbabe1dd52 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2e287ff3042..c5214f0f3d3 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 5c8aa32949d..ee3ac4207cc 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -12,12 +12,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include @@ -313,6 +315,12 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( for (size_t i = 0; i < async_insert_info_with_partition.size(); ++i) { + if (async_insert_info_with_partition[i] == nullptr) + { + LOG_ERROR(getLogger("MergeTreeDataWriter"), "The {}th element in async_insert_info_with_partition is nullptr. There are totally {} partitions in the insert. Selector content is {}", + i, partitions_count, fmt::join(selector.begin(), selector.end(), ",")); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error for async deduplicated insert, please check error logs"); + } result[i].offsets = std::move(async_insert_info_with_partition[i]->offsets); result[i].tokens = std::move(async_insert_info_with_partition[i]->tokens); } diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.cpp b/src/Storages/MergeTree/MergeTreeIOSettings.cpp new file mode 100644 index 00000000000..58c3bd28d6a --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeIOSettings.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +MergeTreeWriterSettings::MergeTreeWriterSettings( + const Settings & global_settings, + const WriteSettings & query_write_settings_, + const MergeTreeSettingsPtr & storage_settings, + bool can_use_adaptive_granularity_, + bool rewrite_primary_key_, + bool blocks_are_granules_size_) + : min_compress_block_size( + storage_settings->min_compress_block_size ? storage_settings->min_compress_block_size : global_settings.min_compress_block_size) + , max_compress_block_size( + storage_settings->max_compress_block_size ? storage_settings->max_compress_block_size : global_settings.max_compress_block_size) + , marks_compression_codec(storage_settings->marks_compression_codec) + , marks_compress_block_size(storage_settings->marks_compress_block_size) + , compress_primary_key(storage_settings->compress_primary_key) + , primary_key_compression_codec(storage_settings->primary_key_compression_codec) + , primary_key_compress_block_size(storage_settings->primary_key_compress_block_size) + , can_use_adaptive_granularity(can_use_adaptive_granularity_) + , rewrite_primary_key(rewrite_primary_key_) + , blocks_are_granules_size(blocks_are_granules_size_) + , query_write_settings(query_write_settings_) + , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) + , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) + , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) + , use_compact_variant_discriminators_serialization(storage_settings->use_compact_variant_discriminators_serialization) +{ +} + +} diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 04171656fcf..c79ca1e66ee 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -1,15 +1,17 @@ #pragma once #include -#include -#include -#include #include #include +#include namespace DB { +struct MergeTreeSettings; +using MergeTreeSettingsPtr = std::shared_ptr; +struct Settings; + class MMappedFileCache; using MMappedFileCachePtr = std::shared_ptr; @@ -58,27 +60,7 @@ struct MergeTreeWriterSettings const MergeTreeSettingsPtr & storage_settings, bool can_use_adaptive_granularity_, bool rewrite_primary_key_, - bool blocks_are_granules_size_ = false) - : min_compress_block_size( - storage_settings->min_compress_block_size ? storage_settings->min_compress_block_size : global_settings.min_compress_block_size) - , max_compress_block_size( - storage_settings->max_compress_block_size ? storage_settings->max_compress_block_size - : global_settings.max_compress_block_size) - , marks_compression_codec(storage_settings->marks_compression_codec) - , marks_compress_block_size(storage_settings->marks_compress_block_size) - , compress_primary_key(storage_settings->compress_primary_key) - , primary_key_compression_codec(storage_settings->primary_key_compression_codec) - , primary_key_compress_block_size(storage_settings->primary_key_compress_block_size) - , can_use_adaptive_granularity(can_use_adaptive_granularity_) - , rewrite_primary_key(rewrite_primary_key_) - , blocks_are_granules_size(blocks_are_granules_size_) - , query_write_settings(query_write_settings_) - , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) - , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) - , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) - , use_compact_variant_discriminators_serialization(storage_settings->use_compact_variant_discriminators_serialization) - { - } + bool blocks_are_granules_size_ = false); size_t min_compress_block_size; size_t max_compress_block_size; diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index e492ca0aec2..c66aa2373e7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -230,7 +231,7 @@ MergeTreeIndexConditionAnnoy::MergeTreeIndexConditionAnnoy( ContextPtr context) : ann_condition(query, context) , distance_function(distance_function_) - , search_k(context->getSettings().annoy_index_search_k_nodes) + , search_k(context->getSettingsRef().annoy_index_search_k_nodes) {} bool MergeTreeIndexConditionAnnoy::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /*idx_granule*/) const diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index 62bb31dff68..067a692a3b5 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -1,5 +1,6 @@ -#include #include +#include +#include namespace fs = std::filesystem; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index b11cbf1e034..284d47ef9e7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -44,10 +44,12 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( const String & index_name_, const Block & index_sample_block_, size_t max_rows_, - MutableColumns && mutable_columns_) + MutableColumns && mutable_columns_, + std::vector && set_hyperrectangle_) : index_name(index_name_) , max_rows(max_rows_) , block(index_sample_block_.cloneWithColumns(std::move(mutable_columns_))) + , set_hyperrectangle(std::move(set_hyperrectangle_)) { } @@ -106,6 +108,10 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; settings.position_independent_encoding = false; + set_hyperrectangle.clear(); + Field min_val; + Field max_val; + for (size_t i = 0; i < num_columns; ++i) { auto & elem = block.getByPosition(i); @@ -116,6 +122,13 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr); + + if (const auto * column_nullable = typeid_cast(elem.column.get())) + column_nullable->getExtremesNullLast(min_val, max_val); + else + elem.column->getExtremes(min_val, max_val); + + set_hyperrectangle.emplace_back(min_val, true, max_val, true); } } @@ -182,10 +195,29 @@ void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size if (has_new_data) { + FieldRef field_min; + FieldRef field_max; for (size_t i = 0; i < columns.size(); ++i) { auto filtered_column = block.getByName(index_columns[i]).column->filter(filter, block.rows()); columns[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size()); + + if (const auto * column_nullable = typeid_cast(filtered_column.get())) + column_nullable->getExtremesNullLast(field_min, field_max); + else + filtered_column->getExtremes(field_min, field_max); + + if (set_hyperrectangle.size() <= i) + { + set_hyperrectangle.emplace_back(field_min, true, field_max, true); + } + else + { + set_hyperrectangle[i].left + = applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].left, field_min) ? set_hyperrectangle[i].left : field_min; + set_hyperrectangle[i].right + = applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].right, field_max) ? field_max : set_hyperrectangle[i].right; + } } } @@ -221,7 +253,7 @@ bool MergeTreeIndexAggregatorSet::buildFilter( MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset() { - auto granule = std::make_shared(index_name, index_sample_block, max_rows, std::move(columns)); + auto granule = std::make_shared(index_name, index_sample_block, max_rows, std::move(columns), std::move(set_hyperrectangle)); switch (data.type) { @@ -240,17 +272,22 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset() return granule; } +KeyCondition buildCondition(const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context) +{ + return KeyCondition{filter_actions_dag, context, index.column_names, index.expression}; +} MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( - const String & index_name_, - const Block & index_sample_block, size_t max_rows_, const ActionsDAGPtr & filter_dag, - ContextPtr context) - : index_name(index_name_) + ContextPtr context, + const IndexDescription & index_description) + : index_name(index_description.name) , max_rows(max_rows_) + , index_data_types(index_description.data_types) + , condition(buildCondition(index_description, filter_dag, context)) { - for (const auto & name : index_sample_block.getNames()) + for (const auto & name : index_description.sample_block.getNames()) if (!key_columns.contains(name)) key_columns.insert(name); @@ -293,6 +330,9 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx if (size == 0 || (max_rows != 0 && size > max_rows)) return true; + if (!condition.checkInHyperrectangle(granule.set_hyperrectangle, index_data_types).can_be_true) + return false; + Block result = granule.block; actions->execute(result); @@ -546,7 +586,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator(const Merge MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const { - return std::make_shared(index.name, index.sample_block, max_rows, filter_actions_dag, context); + return std::make_shared(max_rows, filter_actions_dag, context, index); } MergeTreeIndexPtr setIndexCreator(const IndexDescription & index) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 6efc2effafd..168262360fc 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -22,7 +22,8 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule const String & index_name_, const Block & index_sample_block_, size_t max_rows_, - MutableColumns && columns_); + MutableColumns && columns_, + std::vector && set_hyperrectangle_); void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; @@ -36,6 +37,7 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule const size_t max_rows; Block block; + std::vector set_hyperrectangle; }; @@ -73,6 +75,7 @@ private: ClearableSetVariants data; Sizes key_sizes; MutableColumns columns; + std::vector set_hyperrectangle; }; @@ -80,11 +83,10 @@ class MergeTreeIndexConditionSet final : public IMergeTreeIndexCondition { public: MergeTreeIndexConditionSet( - const String & index_name_, - const Block & index_sample_block, size_t max_rows_, const ActionsDAGPtr & filter_dag, - ContextPtr context); + ContextPtr context, + const IndexDescription & index_description); bool alwaysUnknownOrTrue() const override; @@ -119,6 +121,9 @@ private: std::unordered_set key_columns; ExpressionActionsPtr actions; String actions_output_column_name; + + DataTypes index_data_types; + KeyCondition condition; }; diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 1db70162bff..9223d6fd5b1 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -1,5 +1,6 @@ -#include #include +#include +#include #include #include diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 2c249f7b63b..b6f626c992b 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace ProfileEvents diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index dc1ba030f45..1daf46b6e25 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -335,4 +336,12 @@ void MergeTreeReadPool::fillPerThreadInfo(size_t threads, size_t sum_marks) } } +MergeTreeReadPool::BackoffSettings::BackoffSettings(const DB::Settings& settings) + : min_read_latency_ms(settings.read_backoff_min_latency_ms.totalMilliseconds()), + max_throughput(settings.read_backoff_max_throughput), + min_interval_between_events_ms(settings.read_backoff_min_interval_between_events_ms.totalMilliseconds()), + min_events(settings.read_backoff_min_events), + min_concurrency(settings.read_backoff_min_concurrency) +{} + } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index cb0e8a9657f..cd12b9db9fd 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -64,12 +64,7 @@ public: size_t min_concurrency = 1; /// Constants above is just an example. - explicit BackoffSettings(const Settings & settings) - : min_read_latency_ms(settings.read_backoff_min_latency_ms.totalMilliseconds()), - max_throughput(settings.read_backoff_max_throughput), - min_interval_between_events_ms(settings.read_backoff_min_interval_between_events_ms.totalMilliseconds()), - min_events(settings.read_backoff_min_events), - min_concurrency(settings.read_backoff_min_concurrency) {} + explicit BackoffSettings(const Settings & settings); BackoffSettings() : min_read_latency_ms(0) {} }; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 78b67de1a7e..a6298aab3d9 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -145,8 +145,12 @@ ChunkAndProgress MergeTreeSelectProcessor::read() ordered_columns.push_back(res.block.getByName(name).column); } + auto chunk = Chunk(ordered_columns, res.row_count); + if (add_part_level) + chunk.getChunkInfos().add(std::make_shared(task->getInfo().data_part->info.level)); + return ChunkAndProgress{ - .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared(task->getInfo().data_part->info.level) : nullptr), + .chunk = std::move(chunk), .num_read_rows = res.num_read_rows, .num_read_bytes = res.num_read_bytes, .is_finished = false}; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 02f8d6f4f6a..07476e8b2e9 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -264,7 +265,10 @@ try ++it; } - return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared(data_part->info.level) : nullptr); + auto result = Chunk(std::move(res_columns), rows_read); + if (add_part_level) + result.getChunkInfos().add(std::make_shared(data_part->info.level)); + return result; } } else diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 5d6f08d3c53..c968ad84936 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -115,6 +115,16 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr conte #undef ADD_IF_ABSENT } +bool MergeTreeSettings::isReadonlySetting(const String & name) +{ + return name == "index_granularity" || name == "index_granularity_bytes" || name == "enable_mixed_granularity_parts"; +} + +bool MergeTreeSettings::isPartFormatSetting(const String & name) +{ + return name == "min_bytes_for_wide_part" || name == "min_rows_for_wide_part"; +} + void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const { if (number_of_free_entries_in_pool_to_execute_mutation > background_pool_tasks) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index c0afd781c7e..f5ada81cf55 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -268,17 +268,8 @@ struct MergeTreeSettings : public BaseSettings, public /// NOTE: will rewrite the AST to add immutable settings. void loadFromQuery(ASTStorage & storage_def, ContextPtr context, bool is_attach); - /// We check settings after storage creation - static bool isReadonlySetting(const String & name) - { - return name == "index_granularity" || name == "index_granularity_bytes" - || name == "enable_mixed_granularity_parts"; - } - - static bool isPartFormatSetting(const String & name) - { - return name == "min_bytes_for_wide_part" || name == "min_rows_for_wide_part"; - } + static bool isReadonlySetting(const String & name); + static bool isPartFormatSetting(const String & name); /// Check that the values are sane taking also query-level settings into account. void sanityCheck(size_t background_pool_tasks) const; @@ -295,4 +286,10 @@ namespace MergeTreeColumnSettings void validate(const SettingsChanges & changes); } +[[maybe_unused]] static bool needSyncPart(size_t input_rows, size_t input_bytes, const MergeTreeSettings & settings) +{ + return ( + (settings.min_rows_to_fsync_after_merge && input_rows >= settings.min_rows_to_fsync_after_merge) + || (settings.min_compressed_bytes_to_fsync_after_merge && input_bytes >= settings.min_compressed_bytes_to_fsync_after_merge)); +} } diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 05751e0fa6f..0ca4276b858 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,14 +1,22 @@ #include #include #include +#include #include #include +#include + namespace ProfileEvents { extern const Event DuplicatedInsertedBlocks; } +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace DB { @@ -58,12 +66,12 @@ void MergeTreeSink::onCancel() { } -void MergeTreeSink::consume(Chunk chunk) +void MergeTreeSink::consume(Chunk & chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -76,6 +84,18 @@ void MergeTreeSink::consume(Chunk chunk) size_t streams = 0; bool support_parallel_write = false; + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + const bool need_to_define_dedup_token = !token_info->isDefined(); + + String block_dedup_token; + if (token_info->isDefined()) + block_dedup_token = token_info->getToken(); + for (auto & current_block : part_blocks) { ProfileEvents::Counters part_counters; @@ -100,22 +120,16 @@ void MergeTreeSink::consume(Chunk chunk) if (!temp_part.part) continue; + if (need_to_define_dedup_token) + { + chassert(temp_part.part); + const auto hash_value = temp_part.part->getPartBlockIDHash(); + token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); + } + if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; - String block_dedup_token; - if (storage.getDeduplicationLog()) - { - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - } - size_t max_insert_delayed_streams_for_parallel_write; if (settings.max_insert_delayed_streams_for_parallel_write.changed) @@ -127,6 +141,7 @@ void MergeTreeSink::consume(Chunk chunk) /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); + if (streams > max_insert_delayed_streams_for_parallel_write) { finishDelayedChunk(); @@ -143,11 +158,16 @@ void MergeTreeSink::consume(Chunk chunk) { .temp_part = std::move(temp_part), .elapsed_ns = elapsed_ns, - .block_dedup_token = std::move(block_dedup_token), + .block_dedup_token = block_dedup_token, .part_counters = std::move(part_counters), }); } + if (need_to_define_dedup_token) + { + token_info->finishChunkHashes(); + } + finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); @@ -160,6 +180,8 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; + const Settings & settings = context->getSettingsRef(); + for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -178,7 +200,8 @@ void MergeTreeSink::finishDelayedChunk() storage.fillNewPartName(part, lock); auto * deduplication_log = storage.getDeduplicationLog(); - if (deduplication_log) + + if (settings.insert_deduplicate && deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); auto res = deduplication_log->addPart(block_id, part->info); diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index cf6715a3415..90976020d52 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -25,7 +25,7 @@ public: ~MergeTreeSink() override; String getName() const override { return "MergeTreeSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onStart() override; void onFinish() override; void onCancel() override; @@ -36,7 +36,6 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; - UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index a9a5fddace4..44ab12bde2a 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -232,7 +233,7 @@ static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet else if (type == Field::Types::Float64) { const auto value = output_value.get(); - return value < threshold || threshold < value; + return value < -threshold || threshold < value; } return false; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 9d696b70d9f..38869aebaa5 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 5ae6517a236..c167ac87317 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 4c96cbf2c97..73084f487b9 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 20f387137e7..9aec074deae 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a552ee89aee..9a775db73e2 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1,8 +1,10 @@ #include +#include #include #include #include +#include #include #include #include @@ -19,11 +21,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -1297,6 +1301,7 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; + Block projection_header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1314,14 +1319,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].header = block_to_squash; - Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); + projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); - if (planned_chunk.hasChunkInfo()) + Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); + if (squashed_chunk) { - Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns()); + auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1342,12 +1345,10 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto planned_chunk = projection_squash_plan.flush(); - if (planned_chunk.hasChunkInfo()) + auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); + if (squashed_chunk) { - Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); + auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 4228d7b70b6..10c7bef72fc 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index 4a18d606bb7..93087b0d3fe 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp index 2fc5238827d..24929365b72 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp @@ -1,8 +1,9 @@ #include -#include #include +#include #include +#include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 336d19692d4..6e22a3515bc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 328c03a5b94..7aef249c366 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -1,13 +1,14 @@ +#include +#include #include #include -#include -#include #include #include #include #include +#include namespace DB diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp index 192f0d23f96..3e64a4c7c52 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp @@ -1,6 +1,7 @@ +#include +#include #include #include -#include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index d7601e6e638..dc242a7b084 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 30ba95c46f0..627bda3f8bf 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 35f355d1d9b..268cead2a0e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 4b4f4c33e7d..63e1c12a15f 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -3,20 +3,24 @@ #include #include #include +#include #include "Common/Exception.h" #include #include #include #include #include +#include #include #include +#include #include #include #include #include #include + namespace ProfileEvents { extern const Event DuplicatedInsertedBlocks; @@ -253,12 +257,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const } template -void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) +void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); const auto & settings = context->getSettingsRef(); @@ -284,13 +288,25 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { - const auto & chunk_info = chunk.getChunkInfo(); - if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) + const auto async_insert_info_ptr = chunk.getChunkInfos().get(); + if (async_insert_info_ptr) async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } + String block_dedup_token; + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + const bool need_to_define_dedup_token = !token_info->isDefined(); + + if (token_info->isDefined()) + block_dedup_token = token_info->getToken(); + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; @@ -342,23 +358,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) } else { - if (deduplicate) { - String block_dedup_token; - /// We add the hash from the data and partition identifier to deduplication ID. /// That is, do not insert the same data to the same partition twice. - - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); } @@ -366,6 +369,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } + + if (need_to_define_dedup_token) + { + chassert(temp_part.part); + const auto hash_value = temp_part.part->getPartBlockIDHash(); + token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); + } } profile_events_scope.reset(); @@ -411,17 +421,15 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) )); } + if (need_to_define_dedup_token) + { + token_info->finishChunkHashes(); + } + finishDelayedChunk(zookeeper); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); - /// If deduplicated data should not be inserted into MV, we need to set proper - /// value for `last_block_is_duplicate`, which is possible only after the part is committed. - /// Othervide we can delay commit. - /// TODO: we can also delay commit if there is no MVs. - if (!settings.deduplicate_blocks_in_dependent_materialized_views) - finishDelayedChunk(zookeeper); - ++num_blocks_processed; } @@ -431,8 +439,6 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF if (!delayed_chunk) return; - last_block_is_duplicate = false; - for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -445,8 +451,6 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF { bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; - last_block_is_duplicate = last_block_is_duplicate || deduplicated; - /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); @@ -535,7 +539,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl ProfileEventsScope profile_events_scope; String original_part_dir = part->getDataPartStorage().getPartDirectory(); - auto try_rollback_part_rename = [this, &part, &original_part_dir]() + auto try_rollback_part_rename = [this, &part, &original_part_dir] () { if (original_part_dir == part->getDataPartStorage().getPartDirectory()) return; @@ -1151,8 +1155,16 @@ void ReplicatedMergeTreeSinkImpl::onStart() template void ReplicatedMergeTreeSinkImpl::onFinish() { - auto zookeeper = storage.getZooKeeper(); - finishDelayedChunk(std::make_shared(zookeeper)); + const auto & settings = context->getSettingsRef(); + + ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( + settings.insert_keeper_fault_injection_probability, + settings.insert_keeper_fault_injection_seed, + storage.getZooKeeper(), + "ReplicatedMergeTreeSink::onFinish", + log); + + finishDelayedChunk(zookeeper); } template diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 39623c20584..7d025361717 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -51,7 +51,7 @@ public: ~ReplicatedMergeTreeSinkImpl() override; void onStart() override; - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; String getName() const override { return "ReplicatedMergeTreeSink"; } @@ -59,16 +59,6 @@ public: /// For ATTACHing existing data on filesystem. bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); - /// For proper deduplication in MaterializedViews - bool lastBlockIsDuplicate() const override - { - /// If MV is responsible for deduplication, block is not considered duplicating. - if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - return false; - - return last_block_is_duplicate; - } - struct DelayedChunk; private: std::vector detectConflictsInAsyncBlockIDs(const std::vector & ids); @@ -126,7 +116,6 @@ private: bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; - bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; LoggerPtr log; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 287a4d20543..c4bae5352cb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index a94508ad41f..82ebbf09988 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -1,25 +1,17 @@ #pragma once +#include #include #include #include #include -#include -#include -#include -#include #include -#include -#include namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} +class QueryPlan; /// A Storage that allows reading from a single MergeTree data part. class StorageFromMergeTreeDataPart final : public IStorage @@ -46,20 +38,7 @@ public: String getName() const override { return "FromMergeTreeDataPart"; } - StorageSnapshotPtr getStorageSnapshot( - const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const override - { - const auto & storage_columns = metadata_snapshot->getColumns(); - if (!hasDynamicSubcolumns(storage_columns)) - return std::make_shared(*this, metadata_snapshot); - - auto data_parts = storage.getDataPartsVectorForInternalUsage(); - - auto object_columns = getConcreteObjectColumns( - data_parts.begin(), data_parts.end(), storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); }); - - return std::make_shared(*this, metadata_snapshot, std::move(object_columns)); - } + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const override; void read( QueryPlan & query_plan, @@ -69,21 +48,7 @@ public: ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - size_t num_streams) override - { - query_plan.addStep(MergeTreeDataSelectExecutor(storage) - .readFromParts( - parts, - alter_conversions, - column_names, - storage_snapshot, - query_info, - context, - max_block_size, - num_streams, - nullptr, - analysis_result_ptr)); - } + size_t num_streams) override; bool supportsPrewhere() const override { return true; } @@ -102,12 +67,7 @@ public: return storage.getPartitionIDFromQuery(ast, context); } - bool materializeTTLRecalculateOnly() const - { - if (parts.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "parts must not be empty for materializeTTLRecalculateOnly"); - return parts.front()->storage.getSettings()->materialize_ttl_recalculate_only; - } + bool materializeTTLRecalculateOnly() const; bool hasLightweightDeletedMask() const override { diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 525960d5314..774fd95ebc6 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index d234103e52b..3f0603f6900 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -1,11 +1,14 @@ #include #include #include +#include #include #include #include #include +#include +#include #include #include #include diff --git a/src/Storages/MergeTree/tests/gtest_executor.cpp b/src/Storages/MergeTree/tests/gtest_executor.cpp index 6f34eb4dfbd..c7057ce87c6 100644 --- a/src/Storages/MergeTree/tests/gtest_executor.cpp +++ b/src/Storages/MergeTree/tests/gtest_executor.cpp @@ -34,7 +34,7 @@ public: auto choice = distribution(generator); if (choice == 0) - throw std::runtime_error("Unlucky..."); + throw TestException(); return false; } @@ -48,7 +48,7 @@ public: { auto choice = distribution(generator); if (choice == 0) - throw std::runtime_error("Unlucky..."); + throw TestException(); } Priority getPriority() const override { return {}; } diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 4fb81d69070..36899011e33 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -40,7 +40,7 @@ void MessageQueueSink::onFinish() producer->finish(); } -void MessageQueueSink::consume(Chunk chunk) +void MessageQueueSink::consume(Chunk & chunk) { const auto & columns = chunk.getColumns(); if (columns.empty()) diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index b3c1e61734f..4a9248c6c4d 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -35,7 +35,7 @@ public: String getName() const override { return storage_name + "Sink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onStart() override; void onFinish() override; diff --git a/src/Storages/MySQL/MySQLSettings.cpp b/src/Storages/MySQL/MySQLSettings.cpp index fd53174f4f6..1c88e4a94e9 100644 --- a/src/Storages/MySQL/MySQLSettings.cpp +++ b/src/Storages/MySQL/MySQLSettings.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 0b88a9e8929..8f0e2d76473 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,7 +644,13 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); + InterpreterInsertQuery interpreter( + insert, + nats_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index ba90f21c907..b58aed573ae 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -117,7 +118,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (asts.size() == 1) return collection_copy; - const auto allow_override_by_default = context->getSettings().allow_named_collection_override_by_default; + const auto allow_override_by_default = context->getSettingsRef().allow_named_collection_override_by_default; for (auto * it = std::next(asts.begin()); it != asts.end(); ++it) { @@ -162,7 +163,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); - const auto allow_override_by_default = context->getSettings().allow_named_collection_override_by_default; + const auto allow_override_by_default = context->getSettingsRef().allow_named_collection_override_by_default; for (const auto & key : keys) { if (collection_copy->isOverridable(key, allow_override_by_default)) diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index b4aea096c59..f444a581eb6 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -9,14 +9,14 @@ #include +namespace DB +{ + namespace ErrorCodes { extern const int BAD_ARGUMENTS; } -namespace DB -{ - /// Helper function to get named collection for table engine. /// Table engines have collection name as first argument of ast and other arguments are key-value overrides. MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index 91cc02de0f0..f0a0a562b92 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp index 0484f86542c..6d18b13df01 100644 --- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp @@ -3,7 +3,7 @@ #if USE_AWS_S3 && USE_AVRO #include - +#include #include #include #include diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 155f51adf61..e8071be6f02 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -2,6 +2,7 @@ #if USE_HDFS #include +#include #include #include #include diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index 78cdc442f64..d739f7a28c9 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index b33d55105e9..094ca069e7a 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -1,6 +1,7 @@ #include #if USE_AWS_S3 +#include #include #include #include diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 683473006e3..fe675a17d54 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index c93a0bf6943..818ce055c77 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include #include diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index f2f6eac333c..c864fb9a7e2 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB @@ -39,12 +40,12 @@ StorageObjectStorageSink::StorageObjectStorageSink( configuration->format, *write_buf, sample_block, context, format_settings_); } -void StorageObjectStorageSink::consume(Chunk chunk) +void StorageObjectStorageSink::consume(Chunk & chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void StorageObjectStorageSink::onCancel() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index e0081193686..6ab531bb21a 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onCancel() override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index a9a7e062076..086482e330a 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace fs = std::filesystem; @@ -421,7 +422,7 @@ std::unique_ptr StorageObjectStorageSource::createReadBuffer( /// FIXME: Changing this setting to default value breaks something around parquet reading read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; - const bool object_too_small = object_size <= 2 * context_->getSettings().max_download_buffer_size; + const bool object_too_small = object_size <= 2 * context_->getSettingsRef().max_download_buffer_size; const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read; /// User's object may change, don't cache it. diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index bf595b2f5d4..b5f4cf5bb54 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -1,9 +1,10 @@ +#include +#include #include #include #include #include #include -#include namespace DB { diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h index 05060931b5a..e5fae047ac5 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h @@ -2,21 +2,19 @@ #include "config.h" #include -#include -#include #include -#include +#include #include #include #include #include +#include namespace fs = std::filesystem; namespace Poco { class Logger; } namespace DB { -struct ObjectStorageQueueSettings; class StorageObjectStorageQueue; struct ObjectStorageQueueTableMetadata; struct StorageInMemoryMetadata; diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 4388864434e..14b828e7268 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -454,7 +454,13 @@ bool StorageObjectStorageQueue::streamToViews() while (!shutdown_called && !file_iterator->isFinished()) { - InterpreterInsertQuery interpreter(insert, queue_context, false, true, true); + InterpreterInsertQuery interpreter( + insert, + queue_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto read_from_format_info = prepareReadingFromFormat( block_io.pipeline.getHeader().getNames(), diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index 09b009b26d8..ee2570756ed 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) return it->second; } -void PartitionedSink::consume(Chunk chunk) +void PartitionedSink::consume(Chunk & chunk) { const auto & columns = chunk.getColumns(); @@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk chunk) for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { auto sink = getSinkForPartitionKey(partition_key); - sink->consume(std::move(partition_index_to_chunk[partition_index])); + sink->consume(partition_index_to_chunk[partition_index]); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index 68edeb6fd73..fcd67556dc9 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartitionedSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onException(std::exception_ptr exception) override; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index ba3cc6f58d0..44479bd01e2 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,7 +697,13 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter(insert, insert_context, true); + InterpreterInsertQuery interpreter( + insert, + insert_context, + /* allow_materialized */ true, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 2bb1e2dde0d..f632e553a0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,7 +437,13 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter(insert, insert_context); + InterpreterInsertQuery interpreter( + insert, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index b9edff39b82..a904b29e12f 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index e4b19992151..f3d2aff68c8 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,7 +1129,13 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); + InterpreterInsertQuery interpreter( + insert, + rabbitmq_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index e1d52eefc20..5070b4a17d8 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -2,6 +2,7 @@ #if USE_MYSQL || USE_LIBPQXX +#include #include #include #include diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index e0ef967d491..9c8c4c2fe79 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 90792c59d38..f6127f3ed8a 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -218,17 +219,18 @@ std::pair EmbeddedRocksDBBulkSink::seriali return {std::move(serialized_key_column), std::move(serialized_value_column)}; } -void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) +void EmbeddedRocksDBBulkSink::consume(Chunk & chunk_) { std::vector chunks_to_write = squash(std::move(chunk_)); if (chunks_to_write.empty()) return; + size_t num_chunks = chunks_to_write.size(); auto [serialized_key_column, serialized_value_column] = storage.ttl > 0 ? serializeChunks(std::move(chunks_to_write)) : serializeChunks(std::move(chunks_to_write)); auto sst_file_path = getTemporarySSTFilePath(); - LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "Writing {} rows to SST file {}", serialized_key_column->size(), sst_file_path); + LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "Writing {} rows from {} chunks to SST file {}", serialized_key_column->size(), num_chunks, sst_file_path); if (auto status = buildSSTFile(sst_file_path, *serialized_key_column, *serialized_value_column); !status.ok()) throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); @@ -247,7 +249,10 @@ void EmbeddedRocksDBBulkSink::onFinish() { /// If there is any data left, write it. if (!chunks.empty()) - consume({}); + { + Chunk empty; + consume(empty); + } } String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 1f548e7813d..64190c8c86f 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -32,7 +32,7 @@ public: ~EmbeddedRocksDBBulkSink() override; - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index c451cfd1bf5..1f7f6939f40 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -29,7 +29,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( serializations = getHeader().getSerializations(); } -void EmbeddedRocksDBSink::consume(Chunk chunk) +void EmbeddedRocksDBSink::consume(Chunk & chunk) { auto rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.h b/src/Storages/RocksDB/EmbeddedRocksDBSink.h index 011322df829..2e1e0c7b429 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.h @@ -17,7 +17,7 @@ public: StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; String getName() const override { return "EmbeddedRocksDBSink"; } private: diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index b9d3e071b6c..409703c84c6 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -313,7 +314,8 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt Block block; while (executor.pull(block)) { - sink->consume(Chunk{block.getColumns(), block.rows()}); + auto chunk = Chunk(block.getColumns(), block.rows()); + sink->consume(chunk); } } diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index 5105b190fd9..744d43e1c77 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/SetSettings.cpp b/src/Storages/SetSettings.cpp index baa3d231067..4e6dd6a0519 100644 --- a/src/Storages/SetSettings.cpp +++ b/src/Storages/SetSettings.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index a3f6b6afc5d..1bcc602f168 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -38,6 +38,7 @@ #include #include #include +#include namespace ProfileEvents @@ -607,7 +608,7 @@ public: String getName() const override { return "BufferSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { size_t rows = chunk.getNumRows(); if (!rows) @@ -1020,7 +1021,13 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; + InterpreterInsertQuery interpreter( + insert, + insert_context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 447fd87cdc9..b43cc4fa426 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -9,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -25,13 +27,14 @@ namespace ErrorCodes extern const int CANNOT_DETACH_DICTIONARY_AS_TABLE; extern const int DICTIONARY_ALREADY_EXISTS; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } namespace { void checkNamesAndTypesCompatibleWithDictionary(const String & dictionary_name, const ColumnsDescription & columns, const DictionaryStructure & dictionary_structure) { - auto dictionary_names_and_types = StorageDictionary::getNamesAndTypes(dictionary_structure); + auto dictionary_names_and_types = StorageDictionary::getNamesAndTypes(dictionary_structure, false); std::set names_and_types_set(dictionary_names_and_types.begin(), dictionary_names_and_types.end()); for (const auto & column : columns.getOrdinary()) @@ -47,13 +50,17 @@ namespace } -NamesAndTypesList StorageDictionary::getNamesAndTypes(const DictionaryStructure & dictionary_structure) +NamesAndTypesList StorageDictionary::getNamesAndTypes(const DictionaryStructure & dictionary_structure, bool validate_id_type) { NamesAndTypesList dictionary_names_and_types; if (dictionary_structure.id) - dictionary_names_and_types.emplace_back(dictionary_structure.id->name, std::make_shared()); + { + if (validate_id_type && dictionary_structure.id->type->getTypeId() != TypeIndex::UInt64) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect type of ID column: must be UInt64, but it is {}", dictionary_structure.id->type->getFamilyName()); + dictionary_names_and_types.emplace_back(dictionary_structure.id->name, std::make_shared()); + } /// In old-style (XML) configuration we don't have this attributes in the /// main attribute list, so we have to add them to columns list explicitly. /// In the new configuration (DDL) we have them both in range_* nodes and @@ -105,7 +112,7 @@ StorageDictionary::StorageDictionary( Location location_, ContextPtr context_) : StorageDictionary( - table_id_, dictionary_name_, ColumnsDescription{getNamesAndTypes(dictionary_structure_)}, comment, location_, context_) + table_id_, dictionary_name_, ColumnsDescription{getNamesAndTypes(dictionary_structure_, context_->getSettingsRef().dictionary_validate_primary_key_type)}, comment, location_, context_) { } @@ -162,6 +169,7 @@ Pipe StorageDictionary::read( { auto registered_dictionary_name = location == Location::SameDatabaseAndNameAsDictionary ? getStorageID().getInternalDictionaryName() : dictionary_name; auto dictionary = getContext()->getExternalDictionariesLoader().getDictionary(registered_dictionary_name, local_context); + local_context->checkAccess(AccessType::dictGet, dictionary->getDatabaseOrNoDatabaseTag(), dictionary->getDictionaryID().getTableName()); return dictionary->read(column_names, max_block_size, threads); } diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 17e4efda2cd..44a274cf97c 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -80,7 +80,7 @@ public: std::shared_ptr getDictionary() const; - static NamesAndTypesList getNamesAndTypes(const DictionaryStructure & dictionary_structure); + static NamesAndTypesList getNamesAndTypes(const DictionaryStructure & dictionary_structure, bool validate_id_type); bool isDictionary() const override { return true; } void shutdown(bool is_drop) override; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 2cf3ced2904..65323b4bb52 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1033,7 +1033,13 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter(new_query, query_context); + InterpreterInsertQuery interpreter( + new_query, + query_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index e475211deb3..381c20c616d 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -147,7 +148,7 @@ void StorageExecutable::read( for (auto & input_query : input_queries) { QueryPipelineBuilder builder; - if (context->getSettings().allow_experimental_analyzer) + if (context->getSettingsRef().allow_experimental_analyzer) builder = InterpreterSelectQueryAnalyzer(input_query, context, {}).buildQueryPipeline(); else builder = InterpreterSelectWithUnionQuery(input_query, context, {}).buildQueryPipeline(); @@ -225,7 +226,7 @@ void registerStorageExecutable(StorageFactory & factory) { size_t max_command_execution_time = 10; - size_t max_execution_time_seconds = static_cast(args.getContext()->getSettings().max_execution_time.totalSeconds()); + size_t max_execution_time_seconds = static_cast(args.getContext()->getSettingsRef().max_execution_time.totalSeconds()); if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds) max_command_execution_time = max_execution_time_seconds; diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index d712bd10da4..951c87807bb 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -1,6 +1,6 @@ #include "StorageExternalDistributed.h" - +#include #include #include #include diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index 9d12a1569d8..060b271d8f4 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 8797f6a3dfa..6c31a8a3a71 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -53,6 +53,8 @@ #include #include +#include + #include #include @@ -1787,12 +1789,12 @@ public: String getName() const override { return "StorageFileSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard cancel_lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageFromMergeTreeDataPart.cpp b/src/Storages/StorageFromMergeTreeDataPart.cpp new file mode 100644 index 00000000000..481d9e66901 --- /dev/null +++ b/src/Storages/StorageFromMergeTreeDataPart.cpp @@ -0,0 +1,59 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +bool StorageFromMergeTreeDataPart::materializeTTLRecalculateOnly() const +{ + if (parts.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "parts must not be empty for materializeTTLRecalculateOnly"); + return parts.front()->storage.getSettings()->materialize_ttl_recalculate_only; +} + +void StorageFromMergeTreeDataPart::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + query_plan.addStep(MergeTreeDataSelectExecutor(storage).readFromParts( + parts, + alter_conversions, + column_names, + storage_snapshot, + query_info, + context, + max_block_size, + num_streams, + nullptr, + analysis_result_ptr)); +} + +StorageSnapshotPtr +StorageFromMergeTreeDataPart::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const +{ + const auto & storage_columns = metadata_snapshot->getColumns(); + if (!hasDynamicSubcolumns(storage_columns)) + return std::make_shared(*this, metadata_snapshot); + + auto data_parts = storage.getDataPartsVectorForInternalUsage(); + + auto object_columns = getConcreteObjectColumns( + data_parts.begin(), data_parts.end(), storage_columns, [](const auto & part) -> const auto & { return part->getColumns(); }); + + return std::make_shared(*this, metadata_snapshot, std::move(object_columns)); +} + +} diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 754bc096958..f3eb88f3207 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -30,6 +30,7 @@ #include #include +#include #include #include diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index a5bae0acce5..2226de3e64f 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 47e41cccc96..a0d6cf11b64 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 20f99070000..b32a2d302a7 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -36,6 +37,7 @@ #include #include +#include #include #include #include @@ -63,6 +65,11 @@ namespace DB { +namespace FailPoints +{ + extern const char keepermap_fail_drop_data[]; +} + namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -119,10 +126,10 @@ public: std::string getName() const override { return "StorageKeeperMapSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -410,18 +417,16 @@ StorageKeeperMap::StorageKeeperMap( auto code = client->tryCreate(zk_table_path, "", zkutil::CreateMode::Persistent); - // tables_path was removed with drop - if (code == Coordination::Error::ZNONODE) - { - LOG_INFO(log, "Metadata nodes were removed by another server, will retry"); - continue; - } - else if (code != Coordination::Error::ZOK) - { - throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path); - } + /// A table on the same Keeper path already exists, we just appended our table id to subscribe as a new replica + /// We still don't know if the table matches the expected metadata so table_is_valid is not changed + /// It will be checked lazily on the first operation + if (code == Coordination::Error::ZOK) + return; - return; + if (code != Coordination::Error::ZNONODE) + throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path); + + /// ZNONODE means we dropped zk_tables_path but didn't finish drop completely } if (client->exists(zk_dropped_path)) @@ -472,6 +477,7 @@ StorageKeeperMap::StorageKeeperMap( table_is_valid = true; + /// we are the first table created for the specified Keeper path, i.e. we are the first replica return; } @@ -560,6 +566,10 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock) { + fiu_do_on(FailPoints::keepermap_fail_drop_data, + { + throw zkutil::KeeperException(Coordination::Error::ZOPERATIONTIMEOUT, "Manually triggered operation timeout"); + }); zookeeper->removeChildrenRecursive(zk_data_path); bool completely_removed = false; @@ -1248,7 +1258,10 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca Block block; while (executor.pull(block)) - sink->consume(Chunk{block.getColumns(), block.rows()}); + { + auto chunk = Chunk(block.getColumns(), block.rows()); + sink->consume(chunk); + } sink->finalize(strict); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index de0324d7998..303532dfeca 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,9 +1,11 @@ #include #include +#include #include #include #include +#include #include @@ -21,7 +23,6 @@ #include #include -#include "StorageLogSettings.h" #include #include #include @@ -341,7 +342,7 @@ public: } } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: @@ -398,9 +399,9 @@ private: }; -void LogSink::consume(Chunk chunk) +void LogSink::consume(Chunk & chunk) { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); metadata_snapshot->check(block, true); for (auto & stream : streams | boost::adaptors::map_values) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index ec1559b71a4..b603d0ecf87 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -92,11 +93,6 @@ StorageMaterializedView::StorageMaterializedView( { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); - auto * storage_def = query.storage; - if (storage_def && storage_def->primary_key) - storage_metadata.primary_key = KeyDescription::getKeyFromAST(storage_def->primary_key->ptr(), - storage_metadata.columns, - local_context->getGlobalContext()); if (query.sql_security) storage_metadata.setSQLSecurity(query.sql_security->as()); @@ -109,12 +105,21 @@ StorageMaterializedView::StorageMaterializedView( throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); /// If the destination table is not set, use inner table - has_inner_table = query.to_table_id.empty(); - if (has_inner_table && !query.storage) + auto to_table_id = query.getTargetTableID(ViewTarget::To); + has_inner_table = to_table_id.empty(); + auto to_inner_uuid = query.getTargetInnerUUID(ViewTarget::To); + auto to_table_engine = query.getTargetInnerEngine(ViewTarget::To); + + if (has_inner_table && !to_table_engine) throw Exception(ErrorCodes::INCORRECT_QUERY, "You must specify where to save results of a MaterializedView query: " "either ENGINE or an existing table in a TO clause"); + if (to_table_engine && to_table_engine->primary_key) + storage_metadata.primary_key = KeyDescription::getKeyFromAST(to_table_engine->primary_key->ptr(), + storage_metadata.columns, + local_context->getGlobalContext()); + auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), query.refresh_strategy != nullptr, local_context); if (select.select_table_id) { @@ -134,25 +139,25 @@ StorageMaterializedView::StorageMaterializedView( setInMemoryMetadata(storage_metadata); - bool point_to_itself_by_uuid = has_inner_table && query.to_inner_uuid != UUIDHelpers::Nil - && query.to_inner_uuid == table_id_.uuid; - bool point_to_itself_by_name = !has_inner_table && query.to_table_id.database_name == table_id_.database_name - && query.to_table_id.table_name == table_id_.table_name; + bool point_to_itself_by_uuid = has_inner_table && to_inner_uuid != UUIDHelpers::Nil + && to_inner_uuid == table_id_.uuid; + bool point_to_itself_by_name = !has_inner_table && to_table_id.database_name == table_id_.database_name + && to_table_id.table_name == table_id_.table_name; if (point_to_itself_by_uuid || point_to_itself_by_name) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Materialized view {} cannot point to itself", table_id_.getFullTableName()); if (!has_inner_table) { - target_table_id = query.to_table_id; + target_table_id = to_table_id; } else if (LoadingStrictnessLevel::ATTACH <= mode) { /// If there is an ATTACH request, then the internal table must already be created. - target_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID()), query.to_inner_uuid); + target_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID()), to_inner_uuid); } else { - const String & engine = query.storage->engine->name; + const String & engine = to_table_engine->engine->name; const auto & storage_features = StorageFactory::instance().getStorageFeatures(engine); /// We will create a query to create an internal table. @@ -160,8 +165,8 @@ StorageMaterializedView::StorageMaterializedView( auto manual_create_query = std::make_shared(); manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); - manual_create_query->uuid = query.to_inner_uuid; - manual_create_query->has_uuid = query.to_inner_uuid != UUIDHelpers::Nil; + manual_create_query->uuid = to_inner_uuid; + manual_create_query->has_uuid = to_inner_uuid != UUIDHelpers::Nil; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); @@ -183,7 +188,9 @@ StorageMaterializedView::StorageMaterializedView( } manual_create_query->set(manual_create_query->columns_list, new_columns_list); - manual_create_query->set(manual_create_query->storage, query.storage->ptr()); + + if (to_table_engine) + manual_create_query->set(manual_create_query->storage, to_table_engine); InterpreterCreateQuery create_interpreter(manual_create_query, create_context); create_interpreter.setInternal(true); diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 57da72d06ed..42bac783618 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -1,5 +1,5 @@ -#include #include +#include #include #include @@ -37,7 +37,6 @@ #include #include - namespace DB { @@ -63,7 +62,7 @@ public: String getName() const override { return "MemorySink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); storage_snapshot->metadata->check(block, true); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 2dbe82c92d8..f5bc183931f 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 611289ffd78..444b3fbae4c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -5,47 +5,39 @@ #include #include +#include #include #include #include #include #include -#include #include #include #include #include #include -#include #include -#include #include -#include #include #include #include #include -#include #include #include #include #include #include #include +#include #include -#include #include #include #include -#include #include -#include #include #include #include -#include #include -#include namespace DB @@ -656,12 +648,13 @@ void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr quer { /// It's important to serialize order of mutations with alter queries because /// they can depend on each other. - if (auto alter_lock = tryLockForAlter(query_context->getSettings().lock_acquire_timeout); alter_lock == std::nullopt) + if (auto alter_lock = tryLockForAlter(query_context->getSettingsRef().lock_acquire_timeout); alter_lock == std::nullopt) { - throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, - "Cannot start mutation in {}ms because some metadata-changing ALTER (MODIFY|RENAME|ADD|DROP) is currently executing. " - "You can change this timeout with `lock_acquire_timeout` setting", - query_context->getSettings().lock_acquire_timeout.totalMilliseconds()); + throw Exception( + ErrorCodes::TIMEOUT_EXCEEDED, + "Cannot start mutation in {}ms because some metadata-changing ALTER (MODIFY|RENAME|ADD|DROP) is currently executing. " + "You can change this timeout with `lock_acquire_timeout` setting", + query_context->getSettingsRef().lock_acquire_timeout.totalMilliseconds()); } version = startMutation(commands, query_context); } @@ -1194,7 +1187,6 @@ bool StorageMergeTree::merge( task->setCurrentTransaction(MergeTreeTransactionHolder{}, MergeTreeTransactionPtr{txn}); executeHere(task); - return true; } @@ -1490,7 +1482,6 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign scheduled = true; } - return scheduled; } @@ -1597,9 +1588,7 @@ bool StorageMergeTree::optimize( if (!partition && final) { if (cleanup && this->merging_params.mode != MergingParams::Mode::Replacing) - { throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, "Cannot OPTIMIZE with CLEANUP table: only ReplacingMergeTree can be CLEANUP"); - } if (cleanup && !getSettings()->allow_experimental_replacing_merge_with_cleanup) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental merges with CLEANUP are not allowed"); @@ -1613,15 +1602,15 @@ bool StorageMergeTree::optimize( for (const String & partition_id : partition_ids) { if (!merge( - true, - partition_id, - true, - deduplicate, - deduplicate_by_columns, - cleanup, - txn, - disable_reason, - local_context->getSettingsRef().optimize_skip_merged_partitions)) + true, + partition_id, + true, + deduplicate, + deduplicate_by_columns, + cleanup, + txn, + disable_reason, + local_context->getSettingsRef().optimize_skip_merged_partitions)) { constexpr auto message = "Cannot OPTIMIZE table: {}"; if (disable_reason.text.empty()) @@ -1641,15 +1630,15 @@ bool StorageMergeTree::optimize( partition_id = getPartitionIDFromQuery(partition, local_context); if (!merge( - true, - partition_id, - final, - deduplicate, - deduplicate_by_columns, - cleanup, - txn, - disable_reason, - local_context->getSettingsRef().optimize_skip_merged_partitions)) + true, + partition_id, + final, + deduplicate, + deduplicate_by_columns, + cleanup, + txn, + disable_reason, + local_context->getSettingsRef().optimize_skip_merged_partitions)) { constexpr auto message = "Cannot OPTIMIZE table: {}"; if (disable_reason.text.empty()) diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 4d819508934..064b51739bd 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -211,7 +211,6 @@ private: bool optimize_skip_merged_partitions = false, SelectPartsDecision * select_decision_out = nullptr); - MergeMutateSelectedEntryPtr selectPartsToMutate( const StorageMetadataPtr & metadata_snapshot, PreformattedMessage & disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & currently_processing_in_background_mutex_lock); @@ -310,6 +309,7 @@ private: }; protected: + /// Collect mutations that have to be applied on the fly: currently they are only RENAME COLUMN. MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const override; }; diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 62a2a048642..e0818fafae9 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -107,12 +106,12 @@ public: String getName() const override { return "StorageMongoDBSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { Poco::MongoDB::Database db(db_name); Poco::MongoDB::Document::Vector documents; - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t num_rows = block.rows(); size_t num_cols = block.columns(); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index da391909dff..1d1a0ffdeaf 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -151,9 +152,9 @@ public: String getName() const override { return "StorageMySQLSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index b5a388e8159..d3214e7ed13 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -227,9 +227,9 @@ public: String getName() const override { return "PostgreSQLSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); if (!inserter) { if (on_conflict.empty()) diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index 83bb3c606c9..1a275320f43 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -147,7 +147,7 @@ class RedisSink : public SinkToStorage public: RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; String getName() const override { return "RedisSink"; } private: @@ -169,10 +169,10 @@ RedisSink::RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadat } } -void RedisSink::consume(Chunk chunk) +void RedisSink::consume(Chunk & chunk) { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -567,7 +567,8 @@ void StorageRedis::mutate(const MutationCommands & commands, ContextPtr context_ Block block; while (executor.pull(block)) { - sink->consume(Chunk{block.getColumns(), block.rows()}); + Chunk chunk(block.getColumns(), block.rows()); + sink->consume(chunk); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 9fb0f082c6c..3f02486ed15 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -21,7 +21,9 @@ #include #include +#include #include +#include #include @@ -40,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -1513,7 +1516,7 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD constexpr bool paranoid_check_for_covered_parts_default = true; #else constexpr bool paranoid_check_for_covered_parts_default = false; @@ -2380,7 +2383,7 @@ static void paranoidCheckForCoveredPartsInZooKeeper( const String & covering_part_name, const StorageReplicatedMergeTree & storage) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD constexpr bool paranoid_check_for_covered_parts_default = true; #else constexpr bool paranoid_check_for_covered_parts_default = false; @@ -7360,7 +7363,7 @@ void StorageReplicatedMergeTree::fetchPartition( if (try_no) LOG_INFO(log, "Some of parts ({}) are missing. Will try to fetch covering parts.", missing_parts.size()); - if (try_no >= query_context->getSettings().max_fetch_partition_retries_count) + if (try_no >= query_context->getSettingsRef().max_fetch_partition_retries_count) throw Exception(ErrorCodes::TOO_MANY_RETRIES_TO_FETCH_PARTS, "Too many retries to fetch parts from {}:{}", from_zookeeper_name, best_replica_path); diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 179e4cee199..85417a2f2a4 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -141,7 +141,7 @@ public: String getName() const override { return "SQLiteSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 5b7f9fc0ac2..5692da4e454 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -44,7 +45,7 @@ public: const String & backup_file_name_, bool persistent_); String getName() const override { return "SetOrJoinSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: @@ -82,9 +83,9 @@ SetOrJoinSink::SetOrJoinSink( { } -void SetOrJoinSink::consume(Chunk chunk) +void SetOrJoinSink::consume(Chunk & chunk) { - Block block = getHeader().cloneWithColumns(chunk.detachColumns()); + Block block = getHeader().cloneWithColumns(chunk.getColumns()); table.insertBlock(block, getContext()); if (persistent) diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index 67a9528ff5e..5b6d899b48d 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -2,7 +2,6 @@ #include #include -#include namespace DB diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 43560a6d95d..c892cca4523 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -7,6 +7,8 @@ #include #include +#include + #include #include #include @@ -226,9 +228,9 @@ public: } } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - block_out->write(getHeader().cloneWithColumns(chunk.detachColumns())); + block_out->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onFinish() override @@ -289,7 +291,7 @@ StorageStripeLog::StorageStripeLog( , data_file_path(table_path + "data.bin") , index_file_path(table_path + "index.mrk") , file_checker(disk, table_path + "sizes.json") - , max_compress_block_size(context_->getSettings().max_compress_block_size) + , max_compress_block_size(context_->getSettingsRef().max_compress_block_size) , log(getLogger("StorageStripeLog")) { StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 895da028fc2..fd7c3976803 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -565,12 +567,12 @@ StorageURLSink::StorageURLSink( } -void StorageURLSink::consume(Chunk chunk) +void StorageURLSink::consume(Chunk & chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void StorageURLSink::onCancel() diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index fa7cc6eeeef..1804079e75f 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -251,7 +251,7 @@ public: const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onCancel() override; void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index 2e7c63d0097..69041482fd8 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 016de94c17c..5f768bce978 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -19,6 +19,8 @@ #include +#include + #include #include #include diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index fb8fa2d6da4..fa23a574ade 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -4,6 +4,8 @@ #include #include +#include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 8dd8d3b6154..6a7810b97f9 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 9201eef185f..35b9c0008c6 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1,5 +1,6 @@ // autogenerated by tests/ci/version_helper.py const char * auto_contributors[] { + "0x01f", "0xflotus", "13DaGGeR", "1lann", @@ -167,6 +168,7 @@ const char * auto_contributors[] { "AnneClickHouse", "Anselmo D. Adams", "Anthony N. Simon", + "AntiTopQuark", "Anton Ivashkin", "Anton Kobzev", "Anton Kozlov", @@ -299,6 +301,7 @@ const char * auto_contributors[] { "Dan Wu", "DanRoscigno", "Dani Pozo", + "Daniel Anugerah", "Daniel Bershatsky", "Daniel Byta", "Daniel Dao", @@ -370,6 +373,7 @@ const char * auto_contributors[] { "Elena", "Elena Baskakova", "Elena Torró", + "Elena Torró Martínez", "Elghazal Ahmed", "Eliot Hautefeuille", "Elizaveta Mironyuk", @@ -415,6 +419,7 @@ const char * auto_contributors[] { "FgoDt", "Filatenkov Artur", "Filipe Caixeta", + "Filipp Bakanov", "Filipp Ozinov", "Filippov Denis", "Fille", @@ -451,6 +456,7 @@ const char * auto_contributors[] { "Gleb Novikov", "Gleb-Tretyakov", "GoGoWen2021", + "Gosha Letov", "Gregory", "Grigorii Sokolik", "Grigory", @@ -461,6 +467,7 @@ const char * auto_contributors[] { "Guillaume Tassery", "Guo Wangyang", "Guo Wei (William)", + "Guspan Tanadi", "Haavard Kvaalen", "Habibullah Oladepo", "HaiBo Li", @@ -474,6 +481,7 @@ const char * auto_contributors[] { "HarryLeeIBM", "Hasitha Kanchana", "Hasnat", + "Haydn", "Heena Bansal", "HeenaBansal2009", "Hendrik M", @@ -606,6 +614,7 @@ const char * auto_contributors[] { "Kevin Chiang", "Kevin Michel", "Kevin Mingtarja", + "Kevin Song", "Kevin Zhang", "KevinyhZou", "KinderRiven", @@ -661,6 +670,7 @@ const char * auto_contributors[] { "Lewinma", "Li Shuai", "Li Yin", + "Linh Giang", "Lino Uruñuela", "Lirikl", "Liu Cong", @@ -690,6 +700,7 @@ const char * auto_contributors[] { "Maksim Alekseev", "Maksim Buren", "Maksim Fedotov", + "Maksim Galkin", "Maksim Kita", "Maksym Sobolyev", "Mal Curtis", @@ -724,6 +735,7 @@ const char * auto_contributors[] { "Max Akhmedov", "Max Bruce", "Max K", + "Max K.", "Max Kainov", "Max Vetrov", "MaxTheHuman", @@ -811,6 +823,7 @@ const char * auto_contributors[] { "Nataly Merezhuk", "Natalya Chizhonkova", "Natasha Murashkina", + "Nathan Clevenger", "NeZeD [Mac Pro]", "Neeke Gao", "Neng Liu", @@ -946,6 +959,7 @@ const char * auto_contributors[] { "Robert Coelho", "Robert Hodges", "Robert Schulze", + "Rodolphe Dugé de Bernonville", "RogerYK", "Rohit Agarwal", "Romain Neutron", @@ -1107,6 +1121,7 @@ const char * auto_contributors[] { "Timur Solodovnikov", "TiunovNN", "Tobias Adamson", + "Tobias Florek", "Tobias Lins", "Tom Bombadil", "Tom Risse", @@ -1231,11 +1246,13 @@ const char * auto_contributors[] { "Yingchun Lai", "Yingfan Chen", "Yinzheng-Sun", + "Yinzuo Jiang", "Yiğit Konur", "Yohann Jardin", "Yong Wang", "Yong-Hao Zou", "Youenn Lebras", + "Your Name", "Yu, Peng", "Yuko Takagi", "Yuntao Wu", @@ -1250,6 +1267,7 @@ const char * auto_contributors[] { "Yury Stankevich", "Yusuke Tanaka", "Zach Naimon", + "Zawa-II", "Zheng Miao", "ZhiHong Zhang", "ZhiYong Wang", @@ -1380,6 +1398,7 @@ const char * auto_contributors[] { "conicliu", "copperybean", "coraxster", + "cw5121", "cwkyaoyao", "d.v.semenov", "dalei2019", @@ -1460,12 +1479,14 @@ const char * auto_contributors[] { "fuzzERot", "fyu", "g-arslan", + "gabrielmcg44", "ggerogery", "giordyb", "glockbender", "glushkovds", "grantovsky", "gulige", + "gun9nir", "guoleiyi", "guomaolin", "guov100", @@ -1527,6 +1548,7 @@ const char * auto_contributors[] { "jferroal", "jiahui-97", "jianmei zhang", + "jiaosenvip", "jinjunzh", "jiyoungyoooo", "jktng", @@ -1541,6 +1563,7 @@ const char * auto_contributors[] { "jun won", "jus1096", "justindeguzman", + "jwoodhead", "jyz0309", "karnevil13", "kashwy", @@ -1633,10 +1656,12 @@ const char * auto_contributors[] { "mateng0915", "mateng915", "mauidude", + "max-vostrikov", "maxim", "maxim-babenko", "maxkuzn", "maxulan", + "maxvostrikov", "mayamika", "mehanizm", "melin", @@ -1677,6 +1702,7 @@ const char * auto_contributors[] { "nathanbegbie", "nauta", "nautaa", + "nauu", "ndchikin", "nellicus", "nemonlou", @@ -1975,6 +2001,7 @@ const char * auto_contributors[] { "张健", "张风啸", "徐炘", + "忒休斯~Theseus", "曲正鹏", "木木夕120", "未来星___费", diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index f96b839a322..ae1a233ea81 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index fbc99ab865e..7e4c1de1c65 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemDetachedTables.cpp b/src/Storages/System/StorageSystemDetachedTables.cpp new file mode 100644 index 00000000000..56c5e49b467 --- /dev/null +++ b/src/Storages/System/StorageSystemDetachedTables.cpp @@ -0,0 +1,241 @@ +#include "StorageSystemDetachedTables.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace +{ + +class DetachedTablesBlockSource : public ISource +{ +public: + DetachedTablesBlockSource( + std::vector columns_mask_, + Block header_, + UInt64 max_block_size_, + ColumnPtr databases_, + ColumnPtr detached_tables_, + ContextPtr context_) + : ISource(std::move(header_)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , databases(std::move(databases_)) + , context(Context::createCopy(context_)) + { + size_t size = detached_tables_->size(); + detached_tables.reserve(size); + for (size_t idx = 0; idx < size; ++idx) + { + detached_tables.insert(detached_tables_->getDataAt(idx).toString()); + } + } + + String getName() const override { return "DetachedTables"; } + +protected: + Chunk generate() override + { + if (done) + return {}; + + MutableColumns result_columns = getPort().getHeader().cloneEmptyColumns(); + + const auto access = context->getAccess(); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + size_t database_idx = 0; + size_t rows_count = 0; + for (; database_idx < databases->size() && rows_count < max_block_size; ++database_idx) + { + database_name = databases->getDataAt(database_idx).toString(); + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + + if (!database) + continue; + + const bool need_to_check_access_for_tables + = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); + + if (!detached_tables_it || !detached_tables_it->isValid()) + detached_tables_it = database->getDetachedTablesIterator(context, {}, false); + + for (; rows_count < max_block_size && detached_tables_it->isValid(); detached_tables_it->next()) + { + const auto detached_table_name = detached_tables_it->table(); + + if (!detached_tables.contains(detached_table_name)) + continue; + + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, detached_table_name)) + continue; + + fillResultColumnsByDetachedTableIterator(result_columns); + ++rows_count; + } + } + + if (databases->size() == database_idx && (!detached_tables_it || !detached_tables_it->isValid())) + { + done = true; + } + const UInt64 num_rows = result_columns.at(0)->size(); + return Chunk(std::move(result_columns), num_rows); + } + +private: + const std::vector columns_mask; + const UInt64 max_block_size; + const ColumnPtr databases; + NameSet detached_tables; + DatabaseDetachedTablesSnapshotIteratorPtr detached_tables_it; + ContextPtr context; + bool done = false; + DatabasePtr database; + std::string database_name; + + void fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const + { + size_t src_index = 0; + size_t res_index = 0; + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->database()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->table()); + + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->uuid()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->metadataPath()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->isPermanently()); + } +}; + +} + +class ReadFromSystemDetachedTables : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemDetachedTables"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemDetachedTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_); + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::vector columns_mask; + size_t max_block_size; + + ColumnPtr filtered_databases_column; + ColumnPtr filtered_tables_column; +}; + +StorageSystemDetachedTables::StorageSystemDetachedTables(const StorageID & table_id_) : IStorage(table_id_) +{ + StorageInMemoryMetadata storage_metadata; + + auto description = ColumnsDescription{ + ColumnDescription{"database", std::make_shared(), "The name of the database the table is in."}, + ColumnDescription{"table", std::make_shared(), "Table name."}, + ColumnDescription{"uuid", std::make_shared(), "Table uuid (Atomic database)."}, + ColumnDescription{"metadata_path", std::make_shared(), "Path to the table metadata in the file system."}, + ColumnDescription{"is_permanently", std::make_shared(), "Table was detached permanently."}, + }; + + storage_metadata.setColumns(std::move(description)); + + setInMemoryMetadata(storage_metadata); +} + +void StorageSystemDetachedTables::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t max_block_size, + size_t /*num_streams*/) +{ + storage_snapshot->check(column_names); + auto sample_block = storage_snapshot->metadata->getSampleBlock(); + + auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, context, std::move(res_block), std::move(columns_mask), max_block_size); + + query_plan.addStep(std::move(reading)); +} + +ReadFromSystemDetachedTables::ReadFromSystemDetachedTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) +{ +} + +void ReadFromSystemDetachedTables::applyFilters(ActionDAGNodes added_filter_nodes) +{ + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + + filtered_databases_column = detail::getFilteredDatabases(predicate, context); + filtered_tables_column = detail::getFilteredTables(predicate, filtered_databases_column, context, true); +} + +void ReadFromSystemDetachedTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + auto pipe = Pipe(std::make_shared( + std::move(columns_mask), + getOutputStream().header, + max_block_size, + std::move(filtered_databases_column), + std::move(filtered_tables_column), + context)); + pipeline.init(std::move(pipe)); +} +} diff --git a/src/Storages/System/StorageSystemDetachedTables.h b/src/Storages/System/StorageSystemDetachedTables.h new file mode 100644 index 00000000000..cd042f51eaa --- /dev/null +++ b/src/Storages/System/StorageSystemDetachedTables.h @@ -0,0 +1,32 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + +/** Implements the system table `detached_tables`, which allows you to get information about detached tables. + */ +class StorageSystemDetachedTables final : public IStorage +{ +public: + explicit StorageSystemDetachedTables(const StorageID & table_id_); + + std::string getName() const override { return "SystemDetachedTables"; } + + void read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + bool isSystemStorage() const override { return true; } +}; +} diff --git a/src/Storages/System/StorageSystemErrors.cpp b/src/Storages/System/StorageSystemErrors.cpp index 2da268305f8..2955aebeb7e 100644 --- a/src/Storages/System/StorageSystemErrors.cpp +++ b/src/Storages/System/StorageSystemErrors.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/System/StorageSystemEvents.cpp b/src/Storages/System/StorageSystemEvents.cpp index 822d5c77788..f74c316e3fc 100644 --- a/src/Storages/System/StorageSystemEvents.cpp +++ b/src/Storages/System/StorageSystemEvents.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index 6e4ac8b2747..f10ce9e3987 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -16,16 +16,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int DICTIONARIES_WAS_NOT_LOADED; - extern const int FUNCTION_NOT_ALLOWED; - extern const int NOT_IMPLEMENTED; - extern const int SUPPORT_IS_DISABLED; - extern const int ACCESS_DENIED; - extern const int DEPRECATED_FUNCTION; -}; - enum class FunctionOrigin : int8_t { SYSTEM = 0, @@ -40,7 +30,6 @@ namespace MutableColumns & res_columns, const String & name, UInt64 is_aggregate, - std::optional is_deterministic, const String & create_query, FunctionOrigin function_origin, const Factory & factory) @@ -48,58 +37,53 @@ namespace res_columns[0]->insert(name); res_columns[1]->insert(is_aggregate); - if (!is_deterministic.has_value()) - res_columns[2]->insertDefault(); - else - res_columns[2]->insert(*is_deterministic); - if constexpr (std::is_same_v || std::is_same_v) { - res_columns[3]->insert(false); - res_columns[4]->insertDefault(); + res_columns[2]->insert(false); + res_columns[3]->insertDefault(); } else { - res_columns[3]->insert(factory.isCaseInsensitive(name)); + res_columns[2]->insert(factory.isCaseInsensitive(name)); if (factory.isAlias(name)) - res_columns[4]->insert(factory.aliasTo(name)); + res_columns[3]->insert(factory.aliasTo(name)); else - res_columns[4]->insertDefault(); + res_columns[3]->insertDefault(); } - res_columns[5]->insert(create_query); - res_columns[6]->insert(static_cast(function_origin)); + res_columns[4]->insert(create_query); + res_columns[5]->insert(static_cast(function_origin)); if constexpr (std::is_same_v) { if (factory.isAlias(name)) { + res_columns[6]->insertDefault(); res_columns[7]->insertDefault(); res_columns[8]->insertDefault(); res_columns[9]->insertDefault(); res_columns[10]->insertDefault(); res_columns[11]->insertDefault(); - res_columns[12]->insertDefault(); } else { auto documentation = factory.getDocumentation(name); - res_columns[7]->insert(documentation.description); - res_columns[8]->insert(documentation.syntax); - res_columns[9]->insert(documentation.argumentsAsString()); - res_columns[10]->insert(documentation.returned_value); - res_columns[11]->insert(documentation.examplesAsString()); - res_columns[12]->insert(documentation.categoriesAsString()); + res_columns[6]->insert(documentation.description); + res_columns[7]->insert(documentation.syntax); + res_columns[8]->insert(documentation.argumentsAsString()); + res_columns[9]->insert(documentation.returned_value); + res_columns[10]->insert(documentation.examplesAsString()); + res_columns[11]->insert(documentation.categoriesAsString()); } } else { + res_columns[6]->insertDefault(); res_columns[7]->insertDefault(); res_columns[8]->insertDefault(); res_columns[9]->insertDefault(); res_columns[10]->insertDefault(); res_columns[11]->insertDefault(); - res_columns[12]->insertDefault(); } } } @@ -120,7 +104,6 @@ ColumnsDescription StorageSystemFunctions::getColumnsDescription() { {"name", std::make_shared(), "The name of the function."}, {"is_aggregate", std::make_shared(), "Whether the function is an aggregate function."}, - {"is_deterministic", std::make_shared(std::make_shared()), "Whether the function is deterministic."}, {"case_insensitive", std::make_shared(), "Whether the function name can be used case-insensitively."}, {"alias_to", std::make_shared(), "The original function name, if the function name is an alias."}, {"create_query", std::make_shared(), "Obsolete."}, @@ -140,36 +123,14 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c const auto & function_names = functions_factory.getAllRegisteredNames(); for (const auto & function_name : function_names) { - std::optional is_deterministic; - try - { - DO_NOT_UPDATE_ERROR_STATISTICS(); - is_deterministic = functions_factory.tryGet(function_name, context)->isDeterministic(); - } - catch (const Exception & e) - { - /// Some functions throw because they need special configuration or setup before use. - if (e.code() == ErrorCodes::DICTIONARIES_WAS_NOT_LOADED - || e.code() == ErrorCodes::FUNCTION_NOT_ALLOWED - || e.code() == ErrorCodes::NOT_IMPLEMENTED - || e.code() == ErrorCodes::SUPPORT_IS_DISABLED - || e.code() == ErrorCodes::ACCESS_DENIED - || e.code() == ErrorCodes::DEPRECATED_FUNCTION) - { - /// Ignore exception, show is_deterministic = NULL. - } - else - throw; - } - - fillRow(res_columns, function_name, 0, is_deterministic, "", FunctionOrigin::SYSTEM, functions_factory); + fillRow(res_columns, function_name, 0, "", FunctionOrigin::SYSTEM, functions_factory); } const auto & aggregate_functions_factory = AggregateFunctionFactory::instance(); const auto & aggregate_function_names = aggregate_functions_factory.getAllRegisteredNames(); for (const auto & function_name : aggregate_function_names) { - fillRow(res_columns, function_name, 1, {1}, "", FunctionOrigin::SYSTEM, aggregate_functions_factory); + fillRow(res_columns, function_name, 1, "", FunctionOrigin::SYSTEM, aggregate_functions_factory); } const auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); @@ -177,14 +138,14 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c for (const auto & function_name : user_defined_sql_functions_names) { auto create_query = queryToString(user_defined_sql_functions_factory.get(function_name)); - fillRow(res_columns, function_name, 0, {0}, create_query, FunctionOrigin::SQL_USER_DEFINED, user_defined_sql_functions_factory); + fillRow(res_columns, function_name, 0, create_query, FunctionOrigin::SQL_USER_DEFINED, user_defined_sql_functions_factory); } const auto & user_defined_executable_functions_factory = UserDefinedExecutableFunctionFactory::instance(); const auto & user_defined_executable_functions_names = user_defined_executable_functions_factory.getRegisteredNames(context); /// NOLINT(readability-static-accessed-through-instance) for (const auto & function_name : user_defined_executable_functions_names) { - fillRow(res_columns, function_name, 0, {0}, "", FunctionOrigin::EXECUTABLE_USER_DEFINED, user_defined_executable_functions_factory); + fillRow(res_columns, function_name, 0, "", FunctionOrigin::EXECUTABLE_USER_DEFINED, user_defined_executable_functions_factory); } } diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.cpp b/src/Storages/System/StorageSystemMergeTreeSettings.cpp index 7781e3789a4..18730010e53 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.cpp +++ b/src/Storages/System/StorageSystemMergeTreeSettings.cpp @@ -1,7 +1,9 @@ +#include #include #include #include #include +#include #include #include diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.h b/src/Storages/System/StorageSystemMergeTreeSettings.h index e2913a7e55b..4d4150dcec2 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.h +++ b/src/Storages/System/StorageSystemMergeTreeSettings.h @@ -1,6 +1,5 @@ #pragma once -#include #include diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index be945162b39..8671fd850f8 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index a5c496db7e7..00b4824cc3d 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index b437108b00e..f1e9d542fec 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -1,4 +1,6 @@ #include + +#include #include #include #include diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.cpp b/src/Storages/System/StorageSystemSettingsProfileElements.cpp index 2af3e6dfd05..40a669351b1 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.cpp +++ b/src/Storages/System/StorageSystemSettingsProfileElements.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index ba7433fb9ae..cc221bbea69 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 783b899c978..049a442d494 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -1,29 +1,30 @@ +#include + +#include #include -#include +#include #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include #include #include #include #include #include -#include +#include +#include +#include +#include +#include +#include #include @@ -31,6 +32,105 @@ namespace DB { +namespace +{ + +/// Avoid heavy operation on tables if we only queried columns that we can get without table object. +/// Otherwise it will require table initialization for Lazy database. +bool needTable(const DatabasePtr & database, const Block & header) +{ + if (database->getEngineName() != "Lazy") + return true; + + static const std::set columns_without_table = {"database", "name", "uuid", "metadata_modification_time"}; + for (const auto & column : header.getColumnsWithTypeAndName()) + { + if (columns_without_table.find(column.name) == columns_without_table.end()) + return true; + } + return false; +} +} + +namespace detail +{ +ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) +{ + MutableColumnPtr column = ColumnString::create(); + + const auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & database_name : databases | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; /// We don't want to show the internal database for temporary tables in system.tables + + column->insert(database_name); + } + + Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); + return block.getByPosition(0).column; +} + +ColumnPtr getFilteredTables( + const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context, const bool is_detached) +{ + Block sample{ + ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), + ColumnWithTypeAndName(nullptr, std::make_shared(), "engine")}; + + MutableColumnPtr database_column = ColumnString::create(); + MutableColumnPtr engine_column; + + auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); + if (dag) + { + bool filter_by_engine = false; + for (const auto * input : dag->getInputs()) + if (input->result_name == "engine") + filter_by_engine = true; + + if (filter_by_engine) + engine_column = ColumnString::create(); + } + + for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) + { + const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); + DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (!database) + continue; + + if (is_detached) + { + auto table_it = database->getDetachedTablesIterator(context, {}, false); + for (; table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->table()); + } + } + else + { + for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->name()); + if (engine_column) + engine_column->insert(table_it->table()->getName()); + } + } + } + + Block block{ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; + if (engine_column) + block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); + + if (dag) + VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + + return block.getByPosition(0).column; +} + +} StorageSystemTables::StorageSystemTables(const StorageID & table_id_) : IStorage(table_id_) @@ -105,92 +205,6 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) setInMemoryMetadata(storage_metadata); } - -namespace -{ - -ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) -{ - MutableColumnPtr column = ColumnString::create(); - - const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & database_name : databases | boost::adaptors::map_keys) - { - if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - continue; /// We don't want to show the internal database for temporary tables in system.tables - - column->insert(database_name); - } - - Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; - VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); - return block.getByPosition(0).column; -} - -ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context) -{ - Block sample { - ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), - ColumnWithTypeAndName(nullptr, std::make_shared(), "engine") - }; - - MutableColumnPtr database_column = ColumnString::create(); - MutableColumnPtr engine_column; - - auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); - if (dag) - { - bool filter_by_engine = false; - for (const auto * input : dag->getInputs()) - if (input->result_name == "engine") - filter_by_engine = true; - - if (filter_by_engine) - engine_column = ColumnString::create(); - } - - for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) - { - const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); - DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (!database) - continue; - - for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->name()); - if (engine_column) - engine_column->insert(table_it->table()->getName()); - } - } - - Block block {ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; - if (engine_column) - block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); - - if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); - - return block.getByPosition(0).column; -} - -/// Avoid heavy operation on tables if we only queried columns that we can get without table object. -/// Otherwise it will require table initialization for Lazy database. -bool needTable(const DatabasePtr & database, const Block & header) -{ - if (database->getEngineName() != "Lazy") - return true; - - static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; - for (const auto & column : header.getColumnsWithTypeAndName()) - { - if (columns_without_table.find(column.name) == columns_without_table.end()) - return true; - } - return false; -} - - class TablesBlockSource : public ISource { public: @@ -470,7 +484,8 @@ protected: if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) { ast_create->uuid = UUIDHelpers::Nil; - ast_create->to_inner_uuid = UUIDHelpers::Nil; + if (ast_create->targets) + ast_create->targets->resetInnerUUIDs(); } if (columns_mask[src_index++]) @@ -690,8 +705,6 @@ private: std::string database_name; }; -} - class ReadFromSystemTables : public SourceStepWithFilter { public: @@ -756,8 +769,8 @@ void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); - filtered_databases_column = getFilteredDatabases(predicate, context); - filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); + filtered_databases_column = detail::getFilteredDatabases(predicate, context); + filtered_tables_column = detail::getFilteredTables(predicate, filtered_databases_column, context, false); } void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) diff --git a/src/Storages/System/StorageSystemTables.h b/src/Storages/System/StorageSystemTables.h index 87cdf1b6a8e..d6e3996b8e3 100644 --- a/src/Storages/System/StorageSystemTables.h +++ b/src/Storages/System/StorageSystemTables.h @@ -8,6 +8,15 @@ namespace DB class Context; +namespace detail +{ + +ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context); +ColumnPtr +getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context, bool is_detached); + +} + /** Implements the system table `tables`, which allows you to get information about all tables. */ diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 541bd852140..b4d83058c82 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -18,8 +18,12 @@ #include #include #include -#include "base/types.h" + #include + +#include +#include + #include diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index cb46cd19517..81d3b0fe659 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -119,7 +120,7 @@ public: ZooKeeperSink(const Block & header, ContextPtr context) : SinkToStorage(header), zookeeper(context->getZooKeeper()) { } String getName() const override { return "ZooKeeperSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t rows = block.rows(); diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 6ff86b26ca9..97eda1db3fa 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attachNoDescription(context, system_database, "zeros_mt", "Multithreaded version of system.zeros.", true); attach(context, system_database, "databases", "Lists all databases of the current server."); attachNoDescription(context, system_database, "tables", "Lists all tables of the current server."); + attachNoDescription(context, system_database, "detached_tables", "Lists all detached tables of the current server."); attachNoDescription(context, system_database, "columns", "Lists all columns from all tables of the current server."); attach(context, system_database, "functions", "Contains a list of all available ordinary and aggregate functions with their descriptions."); attach(context, system_database, "events", "Contains profiling events and their current value."); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index f831465277d..ac091e7cf3c 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 27c52124e9c..151079154b1 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -271,7 +271,8 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( const ActionsDAG::Node * node, const Block * allowed_inputs, - ActionsDAG::Nodes & additional_nodes) + ActionsDAG::Nodes & additional_nodes, + bool allow_non_deterministic_functions) { if (node->type == ActionsDAG::ActionType::FUNCTION) { @@ -280,8 +281,14 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto & node_copy = additional_nodes.emplace_back(*node); node_copy.children.clear(); for (const auto * child : node->children) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes)) + if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) node_copy.children.push_back(child_copy); + /// Expression like (now_allowed AND allowed) is not allowed if allow_non_deterministic_functions = true. This is important for + /// trivial count optimization, otherwise we can get incorrect results. For example, if the query is + /// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply + /// trivial count. + else if (!allow_non_deterministic_functions) + return nullptr; if (node_copy.children.empty()) return nullptr; @@ -307,7 +314,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { auto & node_copy = additional_nodes.emplace_back(*node); for (auto & child : node_copy.children) - if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child) + if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions); !child) return nullptr; return &node_copy; @@ -321,7 +328,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto index_hint_dag = index_hint->getActions()->clone(); ActionsDAG::NodeRawConstPtrs atoms; for (const auto & output : index_hint_dag->getOutputs()) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) + if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) atoms.push_back(child_copy); if (!atoms.empty()) @@ -355,13 +362,13 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( return node; } -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs) +ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions) { if (!predicate) return nullptr; ActionsDAG::Nodes additional_nodes; - const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes); + const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_non_deterministic_functions); if (!res) return nullptr; @@ -370,7 +377,7 @@ ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context) { - auto dag = splitFilterDagForAllowedInputs(predicate, &block); + auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_non_deterministic_functions=*/ false); if (dag) filterBlockWithDAG(dag, block, context); } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 9045a2f5481..e5cfa47c8f6 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -32,7 +32,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); /// Extract a part of predicate that can be evaluated using only columns from input_names. -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); +/// When allow_non_deterministic_functions is true then even if the predicate contains non-deterministic +/// functions, we still allow to extract a part of the predicate, otherwise we return nullptr. +/// allow_non_deterministic_functions must be false when we are going to use the result to filter parts in +/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is +/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1` +/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is +/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial +/// count optimization will be mistakenly applied to the query. +ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions = true); /// Extract from the input stream a set of `name` column values template diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 77e6ee9cb24..a39dd4e8f5f 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include @@ -304,7 +306,7 @@ namespace public: explicit AddingAggregatedChunkInfoTransform(Block header) : ISimpleTransform(header, header, false) { } - void transform(Chunk & chunk) override { chunk.setChunkInfo(std::make_shared()); } + void transform(Chunk & chunk) override { chunk.getChunkInfos().add(std::make_shared()); } String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; @@ -689,7 +691,13 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter(insert, getContext()); + InterpreterInsertQuery interpreter( + insert, + getContext(), + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); @@ -1201,8 +1209,11 @@ StorageWindowView::StorageWindowView( setInMemoryMetadata(storage_metadata); /// If the target table is not set, use inner target table - has_inner_target_table = query.to_table_id.empty(); - if (has_inner_target_table && !query.storage) + auto to_table_id = query.getTargetTableID(ViewTarget::To); + has_inner_target_table = to_table_id.empty(); + auto to_table_engine = query.getTargetInnerEngine(ViewTarget::To); + + if (has_inner_target_table && !to_table_engine) throw Exception(ErrorCodes::INCORRECT_QUERY, "You must specify where to save results of a WindowView query: " "either ENGINE or an existing table in a TO clause"); @@ -1217,12 +1228,12 @@ StorageWindowView::StorageWindowView( auto inner_query = initInnerQuery(query.select->list_of_selects->children.at(0)->as(), context_); - if (query.inner_storage) - inner_table_engine = query.inner_storage->clone(); + if (auto inner_storage = query.getTargetInnerEngine(ViewTarget::Inner)) + inner_table_engine = inner_storage->clone(); inner_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID())); inner_fetch_query = generateInnerFetchQuery(inner_table_id); - target_table_id = has_inner_target_table ? StorageID(table_id_.database_name, generateTargetTableName(table_id_)) : query.to_table_id; + target_table_id = has_inner_target_table ? StorageID(table_id_.database_name, generateTargetTableName(table_id_)) : to_table_id; if (is_proctime) next_fire_signal = getWindowUpperBound(now()); @@ -1247,7 +1258,7 @@ StorageWindowView::StorageWindowView( new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); target_create_query->set(target_create_query->columns_list, new_columns_list); - target_create_query->set(target_create_query->storage, query.storage->ptr()); + target_create_query->set(target_create_query->storage, to_table_engine); InterpreterCreateQuery create_interpreter_(target_create_query, create_context_); create_interpreter_.setInternal(true); @@ -1413,7 +1424,7 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, const Block & block, ContextPtr local_context) + StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) @@ -1428,7 +1439,7 @@ void StorageWindowView::writeIntoWindowView( window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); + Pipe pipe(std::make_shared(block)); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1473,10 +1484,10 @@ void StorageWindowView::writeIntoWindowView( auto syntax_result = TreeRewriter(local_context).analyze(query, columns); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false); - pipe.addSimpleTransform([&](const Block & header) + pipe.addSimpleTransform([&](const Block & header_) { return std::make_shared( - header, std::make_shared(filter_expression), + header_, std::make_shared(filter_expression), filter_function->getColumnName(), true); }); } @@ -1531,6 +1542,30 @@ void StorageWindowView::writeIntoWindowView( QueryProcessingStage::WithMergeableState); builder = select_block.buildQueryPipeline(); + + builder.addSimpleTransform([&](const Block & stream_header) + { + // Can't move chunk_infos here, that function could be called several times + return std::make_shared(chunk_infos.clone(), stream_header); + }); + + String window_view_id = window_view.getStorageID().hasUUID() ? toString(window_view.getStorageID().uuid) : window_view.getStorageID().getFullNameNotQuoted(); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(window_view_id, stream_header); + }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header); + }); + +#ifdef ABORT_ON_LOGICAL_ERROR + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Afrer tmp table before squashing", stream_header); + }); +#endif + builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1570,6 +1605,13 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); +#ifdef ABORT_ON_LOGICAL_ERROR + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Afrer WatermarkTransform", stream_header); + }); +#endif + auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); @@ -1586,9 +1628,16 @@ void StorageWindowView::writeIntoWindowView( auto convert_actions = std::make_shared( convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); + builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); } +#ifdef ABORT_ON_LOGICAL_ERROR + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Before out", stream_header); + }); +#endif + builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index f79867df424..14ac65091d3 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 84ba92bba00..69e8f0a7880 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 6ea7bdc312d..56071abaa95 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -1,4 +1,5 @@ #include "getStructureOfRemoteTable.h" +#include #include #include #include diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index fc85bde11d9..251470a17a8 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -419,7 +420,7 @@ String transformQueryForExternalDatabase( throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "No column names for query '{}' to external table '{}.{}'", query_info.query_tree->formatASTForErrorMessage(), database, table); - auto clone_query = getASTForExternalDatabaseFromQueryTree(query_info.query_tree); + auto clone_query = getASTForExternalDatabaseFromQueryTree(query_info.query_tree, query_info.table_expression); return transformQueryForExternalDatabaseImpl( clone_query, diff --git a/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp b/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp index 5e0bfdd5f2a..a7f56999d73 100644 --- a/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp +++ b/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -10,7 +11,7 @@ #include #include #include - +#include #include @@ -20,6 +21,7 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; } namespace @@ -55,7 +57,7 @@ public: } -ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tree) +ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tree, const QueryTreeNodePtr & table_expression) { auto new_tree = query_tree->clone(); @@ -63,6 +65,18 @@ ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tre visitor.visit(new_tree); const auto * query_node = new_tree->as(); + const auto & join_tree = query_node->getJoinTree(); + bool allow_where = true; + if (const auto * join_node = join_tree->as()) + { + if (join_node->getKind() == JoinKind::Left) + allow_where = join_node->getLeftTableExpression()->isEqual(*table_expression); + else if (join_node->getKind() == JoinKind::Right) + allow_where = join_node->getRightTableExpression()->isEqual(*table_expression); + else + allow_where = (join_node->getKind() == JoinKind::Inner); + } + auto query_node_ast = query_node->toAST({ .add_cast_for_constants = false, .fully_qualified_identifiers = false }); const IAST * ast = query_node_ast.get(); @@ -76,7 +90,13 @@ ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tre if (union_ast->list_of_selects->children.size() != 1) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "QueryNode AST is not a single ASTSelectQuery, got {}", union_ast->list_of_selects->children.size()); - return union_ast->list_of_selects->children.at(0); + ASTPtr select_query = union_ast->list_of_selects->children.at(0); + auto * select_query_typed = select_query->as(); + if (!select_query_typed) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ASTSelectQuery, got {}", select_query ? select_query->formatForErrorMessage() : "nullptr"); + if (!allow_where) + select_query_typed->setExpression(ASTSelectQuery::Expression::WHERE, nullptr); + return select_query; } } diff --git a/src/Storages/transformQueryForExternalDatabaseAnalyzer.h b/src/Storages/transformQueryForExternalDatabaseAnalyzer.h index f8983619d1f..7d8bf99646b 100644 --- a/src/Storages/transformQueryForExternalDatabaseAnalyzer.h +++ b/src/Storages/transformQueryForExternalDatabaseAnalyzer.h @@ -6,6 +6,6 @@ namespace DB { -ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tree); +ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tree, const QueryTreeNodePtr & table_expression); } diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp index a5c16b3a5aa..83d0ac4ef41 100644 --- a/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/src/TableFunctions/ITableFunctionXDBC.cpp @@ -1,8 +1,10 @@ +#include +#include #include -#include -#include -#include #include +#include +#include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionDictionary.cpp b/src/TableFunctions/TableFunctionDictionary.cpp index 867fbf5b11e..9f0a7ac91b6 100644 --- a/src/TableFunctions/TableFunctionDictionary.cpp +++ b/src/TableFunctions/TableFunctionDictionary.cpp @@ -74,13 +74,12 @@ ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr c /// otherwise, we get table structure by dictionary structure. auto dictionary_structure = external_loader.getDictionaryStructure(dictionary_name, context); - return ColumnsDescription(StorageDictionary::getNamesAndTypes(dictionary_structure)); + return ColumnsDescription(StorageDictionary::getNamesAndTypes(dictionary_structure, false)); } StoragePtr TableFunctionDictionary::executeImpl( const ASTPtr &, ContextPtr context, const std::string & table_name, ColumnsDescription, bool is_insert_query) const { - context->checkAccess(AccessType::dictGet, getDatabaseName(), table_name); StorageID dict_id(getDatabaseName(), table_name); auto dictionary_table_structure = getActualTableStructure(context, is_insert_query); diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index df2835dd630..552b9fde986 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionFactory.cpp b/src/TableFunctions/TableFunctionFactory.cpp index ce3daff0785..e505535ae76 100644 --- a/src/TableFunctions/TableFunctionFactory.cpp +++ b/src/TableFunctions/TableFunctionFactory.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -18,17 +19,17 @@ namespace ErrorCodes } void TableFunctionFactory::registerFunction( - const std::string & name, Value value, CaseSensitiveness case_sensitiveness) + const std::string & name, Value value, Case case_sensitiveness) { if (!table_functions.emplace(name, value).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionFactory: the table function name '{}' is not unique", name); - if (case_sensitiveness == CaseInsensitive + if (case_sensitiveness == Case::Insensitive && !case_insensitive_table_functions.emplace(Poco::toLower(name), value).second) throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionFactory: " "the case insensitive table function name '{}' is not unique", name); - KnownTableFunctionNames::instance().add(name, (case_sensitiveness == CaseInsensitive)); + KnownTableFunctionNames::instance().add(name, (case_sensitiveness == Case::Insensitive)); } TableFunctionPtr TableFunctionFactory::get( diff --git a/src/TableFunctions/TableFunctionFactory.h b/src/TableFunctions/TableFunctionFactory.h index 2cc648ba181..adc74c2e735 100644 --- a/src/TableFunctions/TableFunctionFactory.h +++ b/src/TableFunctions/TableFunctionFactory.h @@ -48,10 +48,10 @@ public: void registerFunction( const std::string & name, Value value, - CaseSensitiveness case_sensitiveness = CaseSensitive); + Case case_sensitiveness = Case::Sensitive); template - void registerFunction(TableFunctionProperties properties = {}, CaseSensitiveness case_sensitiveness = CaseSensitive) + void registerFunction(TableFunctionProperties properties = {}, Case case_sensitiveness = Case::Sensitive) { auto creator = []() -> TableFunctionPtr { return std::make_shared(); }; registerFunction(Function::name, diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 28bf72e07fb..1b6d86f8fa5 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -4,6 +4,7 @@ #include "registerTableFunctions.h" #include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionFileCluster.cpp b/src/TableFunctions/TableFunctionFileCluster.cpp index 3e53349b022..514fa3143d0 100644 --- a/src/TableFunctions/TableFunctionFileCluster.cpp +++ b/src/TableFunctions/TableFunctionFileCluster.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index ad2a142a140..7e4fdea1ff3 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include @@ -217,7 +219,7 @@ Result: void registerTableFunctionFormat(TableFunctionFactory & factory) { - factory.registerFunction({format_table_function_documentation, false}, TableFunctionFactory::CaseInsensitive); + factory.registerFunction({format_table_function_documentation, false}, TableFunctionFactory::Case::Insensitive); } } diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index b027cf42880..ffd318f36f7 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -3,6 +3,8 @@ #if USE_MYSQL #include + +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index 508f85df6a3..d690da9949a 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index d7774bb6478..e60c31b2d77 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index 4b56fa57091..95c531f8a3b 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -174,7 +174,7 @@ StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, Context void registerTableFunctionValues(TableFunctionFactory & factory) { - factory.registerFunction({.documentation = {}, .allow_readonly = true}, TableFunctionFactory::CaseInsensitive); + factory.registerFunction({.documentation = {}, .allow_readonly = true}, TableFunctionFactory::Case::Insensitive); } } diff --git a/src/TableFunctions/TableFunctionView.cpp b/src/TableFunctions/TableFunctionView.cpp index 2a50fb2d006..57501df6d4d 100644 --- a/src/TableFunctions/TableFunctionView.cpp +++ b/src/TableFunctions/TableFunctionView.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionViewIfPermitted.cpp b/src/TableFunctions/TableFunctionViewIfPermitted.cpp index b1691803988..935be6c1987 100644 --- a/src/TableFunctions/TableFunctionViewIfPermitted.cpp +++ b/src/TableFunctions/TableFunctionViewIfPermitted.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/tests/ci/.gitignore b/tests/ci/.gitignore index bc407a1f961..b5e58611040 100644 --- a/tests/ci/.gitignore +++ b/tests/ci/.gitignore @@ -1,4 +1 @@ -*_lambda/lambda-venv -*_lambda/lambda-package -*_lambda/lambda-package.zip gh_cache \ No newline at end of file diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index 1a062d05a23..86dcaf79854 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -3,8 +3,13 @@ import time from pathlib import Path from typing import Optional from shutil import copy2 -from create_release import PackageDownloader, ReleaseInfo, ShellRunner -from ci_utils import WithIter +from create_release import ( + PackageDownloader, + ReleaseInfo, + ReleaseContextManager, + ReleaseProgress, +) +from ci_utils import WithIter, Shell class MountPointApp(metaclass=WithIter): @@ -38,7 +43,6 @@ class R2MountPoint: self.bucket_name = self._PROD_BUCKET_NAME self.aux_mount_options = "" - self.async_mount = False if self.app == MountPointApp.S3FS: self.cache_dir = "/home/ubuntu/s3fs_cache" # self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs @@ -52,7 +56,6 @@ class R2MountPoint: self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}" elif self.app == MountPointApp.RCLONE: # run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return - self.async_mount = True self.cache_dir = "/home/ubuntu/rclone_cache" self.aux_mount_options += "--no-modtime " if self.NOMODTIME else "" self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose @@ -76,19 +79,22 @@ class R2MountPoint: ) _TEST_MOUNT_CMD = f"mount | grep -q {self.MOUNT_POINT}" - ShellRunner.run(_CLEAN_LOG_FILE_CMD) - ShellRunner.run(_UNMOUNT_CMD) - ShellRunner.run(_MKDIR_CMD) - ShellRunner.run(_MKDIR_FOR_CACHE) - ShellRunner.run(self.mount_cmd, async_=self.async_mount) - if self.async_mount: - time.sleep(3) - ShellRunner.run(_TEST_MOUNT_CMD) + Shell.run(_CLEAN_LOG_FILE_CMD) + Shell.run(_UNMOUNT_CMD) + Shell.run(_MKDIR_CMD) + Shell.run(_MKDIR_FOR_CACHE) + if self.app == MountPointApp.S3FS: + Shell.run(self.mount_cmd, check=True) + else: + # didn't manage to use simple run() and without blocking or failure + Shell.run_as_daemon(self.mount_cmd) + time.sleep(3) + Shell.run(_TEST_MOUNT_CMD, check=True) @classmethod def teardown(cls): print(f"Unmount [{cls.MOUNT_POINT}]") - ShellRunner.run(f"umount {cls.MOUNT_POINT}") + Shell.run(f"umount {cls.MOUNT_POINT}") class RepoCodenames(metaclass=WithIter): @@ -101,6 +107,7 @@ class DebianArtifactory: _PROD_REPO_URL = "https://packages.clickhouse.com/deb" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -124,8 +131,8 @@ class DebianArtifactory: cmd = f"{REPREPRO_CMD_PREFIX} includedeb {self.codename} {' '.join(paths)}" print("Running export command:") print(f" {cmd}") - ShellRunner.run(cmd) - ShellRunner.run("sync") + Shell.run(cmd, check=True) + Shell.run("sync") if self.codename == RepoCodenames.LTS: packages_with_version = [ @@ -137,16 +144,19 @@ class DebianArtifactory: cmd = f"{REPREPRO_CMD_PREFIX} copy {RepoCodenames.STABLE} {RepoCodenames.LTS} {' '.join(packages_with_version)}" print("Running copy command:") print(f" {cmd}") - ShellRunner.run(cmd) - ShellRunner.run("sync") + Shell.run(cmd, check=True) + Shell.run("sync") def test_packages(self): - ShellRunner.run("docker pull ubuntu:latest") + Shell.run("docker pull ubuntu:latest") print(f"Test packages installation, version [{self.version}]") - cmd = f"docker run --rm ubuntu:latest bash -c \"apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-client={self.version}\"" + debian_command = f"echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-common-static={self.version} clickhouse-client={self.version}" + cmd = f'docker run --rm ubuntu:latest bash -c "apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; {debian_command}"' print("Running test command:") print(f" {cmd}") - ShellRunner.run(cmd) + Shell.run(cmd, check=True) + self.release_info.debian_command = debian_command + self.release_info.dump() def _copy_if_not_exists(src: Path, dst: Path) -> Path: @@ -167,6 +177,7 @@ class RpmArtifactory: _SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -202,23 +213,26 @@ class RpmArtifactory: for command in commands: print("Running command:") print(f" {command}") - ShellRunner.run(command) + Shell.run(command, check=True) update_public_key = f"gpg --armor --export {self._SIGN_KEY}" pub_key_path = dest_dir / "repodata" / "repomd.xml.key" print("Updating repomd.xml.key") - pub_key_path.write_text(ShellRunner.run(update_public_key)[1]) + pub_key_path.write_text(Shell.run(update_public_key, check=True)) if codename == RepoCodenames.LTS: self.export_packages(RepoCodenames.STABLE) - ShellRunner.run("sync") + Shell.run("sync") def test_packages(self): - ShellRunner.run("docker pull fedora:latest") + Shell.run("docker pull fedora:latest") print(f"Test package installation, version [{self.version}]") - cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"' + rpm_command = f"dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1" + cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && {rpm_command}"' print("Running test command:") print(f" {cmd}") - ShellRunner.run(cmd) + Shell.run(cmd, check=True) + self.release_info.rpm_command = rpm_command + self.release_info.dump() class TgzArtifactory: @@ -226,6 +240,7 @@ class TgzArtifactory: _PROD_REPO_URL = "https://packages.clickhouse.com/tgz" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -256,23 +271,28 @@ class TgzArtifactory: if codename == RepoCodenames.LTS: self.export_packages(RepoCodenames.STABLE) - ShellRunner.run("sync") + Shell.run("sync") def test_packages(self): tgz_file = "/tmp/tmp.tgz" tgz_sha_file = "/tmp/tmp.tgz.sha512" - ShellRunner.run( - f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz" + cmd = f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz" + Shell.run( + cmd, + check=True, ) - ShellRunner.run( - f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512" + Shell.run( + f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512", + check=True, ) - expected_checksum = ShellRunner.run(f"cut -d ' ' -f 1 {tgz_sha_file}") - actual_checksum = ShellRunner.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1") + expected_checksum = Shell.run(f"cut -d ' ' -f 1 {tgz_sha_file}", check=True) + actual_checksum = Shell.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1") assert ( expected_checksum == actual_checksum ), f"[{actual_checksum} != {expected_checksum}]" - ShellRunner.run("rm /tmp/tmp.tgz*") + Shell.run("rm /tmp/tmp.tgz*") + self.release_info.tgz_command = cmd + self.release_info.dump() def parse_args() -> argparse.Namespace: @@ -280,12 +300,6 @@ def parse_args() -> argparse.Namespace: formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Adds release packages to the repository", ) - parser.add_argument( - "--infile", - type=str, - required=True, - help="input file with release info", - ) parser.add_argument( "--export-debian", action="store_true", @@ -326,9 +340,7 @@ def parse_args() -> argparse.Namespace: if __name__ == "__main__": args = parse_args() - assert args.dry_run - release_info = ReleaseInfo.from_file(args.infile) """ Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve: ERROR : IO error: NotImplemented: versionId not implemented @@ -336,20 +348,38 @@ if __name__ == "__main__": """ mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run) if args.export_debian: - mp.init() - DebianArtifactory(release_info, dry_run=args.dry_run).export_packages() - mp.teardown() + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_DEB + ) as release_info: + mp.init() + DebianArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() if args.export_rpm: - mp.init() - RpmArtifactory(release_info, dry_run=args.dry_run).export_packages() - mp.teardown() + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_RPM + ) as release_info: + mp.init() + RpmArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() if args.export_tgz: - mp.init() - TgzArtifactory(release_info, dry_run=args.dry_run).export_packages() - mp.teardown() + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_TGZ + ) as release_info: + mp.init() + TgzArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() if args.test_debian: - DebianArtifactory(release_info, dry_run=args.dry_run).test_packages() + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_DEB + ) as release_info: + DebianArtifactory(release_info, dry_run=args.dry_run).test_packages() if args.test_tgz: - TgzArtifactory(release_info, dry_run=args.dry_run).test_packages() + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_TGZ + ) as release_info: + TgzArtifactory(release_info, dry_run=args.dry_run).test_packages() if args.test_rpm: - RpmArtifactory(release_info, dry_run=args.dry_run).test_packages() + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_RPM + ) as release_info: + RpmArtifactory(release_info, dry_run=args.dry_run).test_packages() diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py index bfd19b11e6d..f2386fe207f 100644 --- a/tests/ci/auto_release.py +++ b/tests/ci/auto_release.py @@ -1,17 +1,17 @@ import argparse -from datetime import timedelta, datetime -import logging +import dataclasses +import json import os -from commit_status_helper import get_commit_filtered_statuses +import sys +from typing import List + from get_robot_token import get_best_robot_token from github_helper import GitHub -from release import Release, Repo as ReleaseRepo, RELEASE_READY_STATUS +from ci_utils import Shell +from env_helper import GITHUB_REPOSITORY from report import SUCCESS -from ssh import SSHKey - -LOGGER_NAME = __name__ -HELPER_LOGGERS = ["github_helper", LOGGER_NAME] -logger = logging.getLogger(LOGGER_NAME) +from ci_buddy import CIBuddy +from ci_config import CI def parse_args(): @@ -21,120 +21,198 @@ def parse_args(): ) parser.add_argument("--token", help="GitHub token, if not set, used from smm") parser.add_argument( - "--repo", default="ClickHouse/ClickHouse", help="Repo owner/name" - ) - parser.add_argument("--dry-run", action="store_true", help="Do not create anything") - parser.add_argument( - "--release-after-days", - type=int, - default=3, - help="Do automatic release on the latest green commit after the latest " - "release if the newest release is older than the specified days", - ) - parser.add_argument( - "--debug-helpers", + "--post-status", action="store_true", - help="Add debug logging for this script and github_helper", + help="Post release branch statuses", ) parser.add_argument( - "--remote-protocol", - "-p", - default="ssh", - choices=ReleaseRepo.VALID, - help="repo protocol for git commands remote, 'origin' is a special case and " - "uses 'origin' as a remote", + "--post-auto-release-complete", + action="store_true", + help="Post autorelease completion status", ) + parser.add_argument( + "--prepare", + action="store_true", + help="Prepare autorelease info", + ) + parser.add_argument( + "--wf-status", + type=str, + default="", + help="overall workflow status [success|failure]", + ) + return parser.parse_args(), parser - return parser.parse_args() + +MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE = 5 +AUTORELEASE_INFO_FILE = "/tmp/autorelease_info.json" + + +@dataclasses.dataclass +class ReleaseParams: + ready: bool + ci_status: str + num_patches: int + release_branch: str + commit_sha: str + commits_to_branch_head: int + latest: bool + + def to_dict(self): + return dataclasses.asdict(self) + + +@dataclasses.dataclass +class AutoReleaseInfo: + releases: List[ReleaseParams] + + def add_release(self, release_params: ReleaseParams) -> None: + self.releases.append(release_params) + + def dump(self): + print(f"Dump release info into [{AUTORELEASE_INFO_FILE}]") + with open(AUTORELEASE_INFO_FILE, "w", encoding="utf-8") as f: + print(json.dumps(dataclasses.asdict(self), indent=2), file=f) + + @staticmethod + def from_file() -> "AutoReleaseInfo": + with open(AUTORELEASE_INFO_FILE, "r", encoding="utf-8") as json_file: + res = json.load(json_file) + releases = [ReleaseParams(**release) for release in res["releases"]] + return AutoReleaseInfo(releases=releases) + + +def _prepare(token): + assert len(token) > 10 + os.environ["GH_TOKEN"] = token + Shell.run("gh auth status", check=True) + + gh = GitHub(token) + prs = gh.get_release_pulls(GITHUB_REPOSITORY) + prs.sort(key=lambda x: x.head.ref) + branch_names = [pr.head.ref for pr in prs] + print(f"Found release branches [{branch_names}]") + + repo = gh.get_repo(GITHUB_REPOSITORY) + autoRelease_info = AutoReleaseInfo(releases=[]) + + for pr in prs: + print(f"\nChecking PR [{pr.head.ref}]") + + refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}")) + assert refs + + refs.sort(key=lambda ref: ref.ref) + latest_release_tag_ref = refs[-1] + latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha) + + commits = Shell.run( + f"git rev-list --first-parent {latest_release_tag.tag}..origin/{pr.head.ref}", + check=True, + ).split("\n") + commit_num = len(commits) + print( + f"Previous release [{latest_release_tag.tag}] was [{commit_num}] commits ago, date [{latest_release_tag.tagger.date}]" + ) + + commits_to_check = commits[:-1] # Exclude the version bump commit + commit_sha = "" + commit_ci_status = "" + commits_to_branch_head = 0 + + for idx, commit in enumerate( + commits_to_check[:MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE] + ): + print( + f"Check commit [{commit}] [{pr.head.ref}~{idx+1}] as release candidate" + ) + commit_num -= 1 + + is_completed = CI.GHActions.check_wf_completed( + token=token, commit_sha=commit + ) + if not is_completed: + print(f"CI is in progress for [{commit}] - check previous commit") + commits_to_branch_head += 1 + continue + + commit_ci_status = CI.GHActions.get_commit_status_by_name( + token=token, + commit_sha=commit, + status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"), + ) + commit_sha = commit + if commit_ci_status == SUCCESS: + break + + print(f"CI status [{commit_ci_status}] - skip") + commits_to_branch_head += 1 + + ready = False + if commit_ci_status == SUCCESS and commit_sha: + print( + f"Add release ready info for commit [{commit_sha}] and release branch [{pr.head.ref}]" + ) + ready = True + else: + print(f"WARNING: No ready commits found for release branch [{pr.head.ref}]") + + autoRelease_info.add_release( + ReleaseParams( + release_branch=pr.head.ref, + commit_sha=commit_sha, + ready=ready, + ci_status=commit_ci_status, + num_patches=commit_num, + commits_to_branch_head=commits_to_branch_head, + latest=False, + ) + ) + + if autoRelease_info.releases: + autoRelease_info.releases[-1].latest = True + + autoRelease_info.dump() def main(): - args = parse_args() - logging.basicConfig(level=logging.INFO) - if args.debug_helpers: - for logger_name in HELPER_LOGGERS: - logging.getLogger(logger_name).setLevel(logging.DEBUG) + args, parser = parse_args() - token = args.token or get_best_robot_token() - days_as_timedelta = timedelta(days=args.release_after_days) - now = datetime.now() - - gh = GitHub(token) - prs = gh.get_release_pulls(args.repo) - branch_names = [pr.head.ref for pr in prs] - - logger.info("Found release branches: %s\n ", " \n".join(branch_names)) - repo = gh.get_repo(args.repo) - - # In general there is no guarantee on which order the refs/commits are - # returned from the API, so we have to order them. - for pr in prs: - logger.info("Checking PR %s", pr.head.ref) - - refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}")) - refs.sort(key=lambda ref: ref.ref) - - latest_release_tag_ref = refs[-1] - latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha) - logger.info("That last release was done at %s", latest_release_tag.tagger.date) - - if latest_release_tag.tagger.date + days_as_timedelta > now: - logger.info( - "Not enough days since the last release %s," - " no automatic release can be done", - latest_release_tag.tag, + if args.post_status: + info = AutoReleaseInfo.from_file() + for release_info in info.releases: + if release_info.ready: + CIBuddy(dry_run=False).post_info( + title=f"Auto Release Status for {release_info.release_branch}", + body=release_info.to_dict(), + ) + else: + CIBuddy(dry_run=False).post_warning( + title=f"Auto Release Status for {release_info.release_branch}", + body=release_info.to_dict(), + ) + elif args.post_auto_release_complete: + assert args.wf_status, "--wf-status Required with --post-auto-release-complete" + if args.wf_status != SUCCESS: + CIBuddy(dry_run=False).post_job_error( + error_description="Autorelease workflow failed", + job_name="Autorelease", + with_instance_info=False, + with_wf_link=True, + critical=True, ) - continue - - unreleased_commits = list( - repo.get_commits(sha=pr.head.ref, since=latest_release_tag.tagger.date) - ) - unreleased_commits.sort( - key=lambda commit: commit.commit.committer.date, reverse=True - ) - - for commit in unreleased_commits: - logger.info("Checking statuses of commit %s", commit.sha) - statuses = get_commit_filtered_statuses(commit) - all_success = all(st.state == SUCCESS for st in statuses) - passed_ready_for_release_check = any( - st.context == RELEASE_READY_STATUS and st.state == SUCCESS - for st in statuses + else: + CIBuddy(dry_run=False).post_info( + title=f"Autorelease completed", + body="", + with_wf_link=True, ) - if not (all_success and passed_ready_for_release_check): - logger.info("Commit is not green, thus not suitable for release") - continue - - logger.info("Commit is ready for release, let's release!") - - release = Release( - ReleaseRepo(args.repo, args.remote_protocol), - commit.sha, - "patch", - args.dry_run, - True, - ) - try: - release.do(True, True, True) - except: - if release.has_rollback: - logging.error( - "!!The release process finished with error, read the output carefully!!" - ) - logging.error( - "Probably, rollback finished with error. " - "If you don't see any of the following commands in the output, " - "execute them manually:" - ) - release.log_rollback() - raise - logging.info("New release is done!") - break + elif args.prepare: + _prepare(token=args.token or get_best_robot_token()) + else: + parser.print_help() + sys.exit(2) if __name__ == "__main__": - if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""): - with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): - main() - else: - main() + main() diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py deleted file mode 100644 index 520abcb41bb..00000000000 --- a/tests/ci/autoscale_runners_lambda/app.py +++ /dev/null @@ -1,232 +0,0 @@ -#!/usr/bin/env python3 - -"""The lambda to decrease/increase ASG desired capacity based on current queue""" - -import logging -from dataclasses import dataclass -from pprint import pformat -from typing import Any, List, Literal, Optional, Tuple - -import boto3 # type: ignore -from lambda_shared import ( - RUNNER_TYPE_LABELS, - CHException, - ClickHouseHelper, - get_parameter_from_ssm, -) - -### Update comment on the change ### -# 4 HOUR - is a balance to get the most precise values -# - Our longest possible running check is around 5h on the worst scenario -# - The long queue won't be wiped out and replaced, so the measurmenet is fine -# - If the data is spoiled by something, we are from the bills perspective -# Changed it to 3 HOUR: in average we have 1h tasks, but p90 is around 2h. -# With 4h we have too much wasted computing time in case of issues with DB -QUEUE_QUERY = f"""SELECT - last_status AS status, - toUInt32(count()) AS length, - labels -FROM -( - SELECT - arraySort(groupArray(status))[-1] AS last_status, - labels, - id, - html_url - FROM default.workflow_jobs - WHERE has(labels, 'self-hosted') - AND hasAny({RUNNER_TYPE_LABELS}, labels) - AND started_at > now() - INTERVAL 3 HOUR - GROUP BY ALL - HAVING last_status IN ('in_progress', 'queued') -) -GROUP BY ALL -ORDER BY labels, last_status""" - - -@dataclass -class Queue: - status: Literal["in_progress", "queued"] - lentgh: int - label: str - - -def get_scales(runner_type: str) -> Tuple[int, int]: - "returns the multipliers for scaling down and up ASG by types" - # Scaling down is quicker on the lack of running jobs than scaling up on - # queue - - # The ASG should deflate almost instantly - scale_down = 1 - # the style checkers have so many noise, so it scales up too quickly - # The 5 was too quick, there are complainings regarding too slow with - # 10. I am trying 7 now. - # 7 still looks a bit slow, so I try 6 - # Let's have it the same as the other ASG - # - # All type of style-checkers should be added very quickly to not block the workflows - # UPDATE THE COMMENT ON CHANGES - scale_up = 3 - if "style" in runner_type: - scale_up = 1 - return scale_down, scale_up - - -CH_CLIENT = None # type: Optional[ClickHouseHelper] - - -def set_capacity( - runner_type: str, queues: List[Queue], client: Any, dry_run: bool = True -) -> None: - assert len(queues) in (1, 2) - assert all(q.label == runner_type for q in queues) - as_groups = client.describe_auto_scaling_groups( - Filters=[ - {"Name": "tag-key", "Values": ["github:runner-type"]}, - {"Name": "tag-value", "Values": [runner_type]}, - ] - )["AutoScalingGroups"] - assert len(as_groups) == 1 - asg = as_groups[0] - running = 0 - queued = 0 - for q in queues: - if q.status == "in_progress": - running = q.lentgh - continue - if q.status == "queued": - queued = q.lentgh - continue - raise ValueError("Queue status is not in ['in_progress', 'queued']") - - scale_down, scale_up = get_scales(runner_type) - # With lyfecycle hooks some instances are actually free because some of - # them are in 'Terminating:Wait' state - effective_capacity = max( - asg["DesiredCapacity"], - len([ins for ins in asg["Instances"] if ins["HealthStatus"] == "Healthy"]), - ) - - # How much nodes are free (positive) or need to be added (negative) - capacity_reserve = effective_capacity - running - queued - stop = False - if capacity_reserve < 0: - # This part is about scaling up - capacity_deficit = -capacity_reserve - # It looks that we are still OK, since no queued jobs exist - stop = stop or queued == 0 - # Are we already at the capacity limits - stop = stop or asg["MaxSize"] <= asg["DesiredCapacity"] - # Let's calculate a new desired capacity - # (capacity_deficit + scale_up - 1) // scale_up : will increase min by 1 - # if there is any capacity_deficit - new_capacity = ( - asg["DesiredCapacity"] + (capacity_deficit + scale_up - 1) // scale_up - ) - new_capacity = max(new_capacity, asg["MinSize"]) - new_capacity = min(new_capacity, asg["MaxSize"]) - # Finally, should the capacity be even changed - stop = stop or asg["DesiredCapacity"] == new_capacity - if stop: - logging.info( - "Do not increase ASG %s capacity, current capacity=%s, effective " - "capacity=%s, maximum capacity=%s, running jobs=%s, queue size=%s", - asg["AutoScalingGroupName"], - asg["DesiredCapacity"], - effective_capacity, - asg["MaxSize"], - running, - queued, - ) - return - - logging.info( - "The ASG %s capacity will be increased to %s, current capacity=%s, " - "effective capacity=%s, maximum capacity=%s, running jobs=%s, queue size=%s", - asg["AutoScalingGroupName"], - new_capacity, - asg["DesiredCapacity"], - effective_capacity, - asg["MaxSize"], - running, - queued, - ) - if not dry_run: - client.set_desired_capacity( - AutoScalingGroupName=asg["AutoScalingGroupName"], - DesiredCapacity=new_capacity, - ) - return - - # Now we will calculate if we need to scale down - stop = stop or asg["DesiredCapacity"] == asg["MinSize"] - new_capacity = asg["DesiredCapacity"] - (capacity_reserve // scale_down) - new_capacity = max(new_capacity, asg["MinSize"]) - new_capacity = min(new_capacity, asg["MaxSize"]) - stop = stop or asg["DesiredCapacity"] == new_capacity - if stop: - logging.info( - "Do not decrease ASG %s capacity, current capacity=%s, effective " - "capacity=%s, minimum capacity=%s, running jobs=%s, queue size=%s", - asg["AutoScalingGroupName"], - asg["DesiredCapacity"], - effective_capacity, - asg["MinSize"], - running, - queued, - ) - return - - logging.info( - "The ASG %s capacity will be decreased to %s, current capacity=%s, effective " - "capacity=%s, minimum capacity=%s, running jobs=%s, queue size=%s", - asg["AutoScalingGroupName"], - new_capacity, - asg["DesiredCapacity"], - effective_capacity, - asg["MinSize"], - running, - queued, - ) - if not dry_run: - client.set_desired_capacity( - AutoScalingGroupName=asg["AutoScalingGroupName"], - DesiredCapacity=new_capacity, - ) - - -def main(dry_run: bool = True) -> None: - logging.getLogger().setLevel(logging.INFO) - asg_client = boto3.client("autoscaling") - try: - global CH_CLIENT - CH_CLIENT = CH_CLIENT or ClickHouseHelper( - get_parameter_from_ssm("clickhouse-test-stat-url"), "play" - ) - queues = CH_CLIENT.select_json_each_row("default", QUEUE_QUERY) - except CHException as ex: - logging.exception( - "Got an exception on insert, tryuing to update the client " - "credentials and repeat", - exc_info=ex, - ) - CH_CLIENT = ClickHouseHelper( - get_parameter_from_ssm("clickhouse-test-stat-url"), "play" - ) - queues = CH_CLIENT.select_json_each_row("default", QUEUE_QUERY) - - logging.info("Received queue data:\n%s", pformat(queues, width=120)) - for runner_type in RUNNER_TYPE_LABELS: - runner_queues = [ - Queue(queue["status"], queue["length"], runner_type) - for queue in queues - if runner_type in queue["labels"] - ] - runner_queues = runner_queues or [Queue("in_progress", 0, runner_type)] - set_capacity(runner_type, runner_queues, asg_client, dry_run) - - -def handler(event: dict, context: Any) -> None: - _ = event - _ = context - return main(False) diff --git a/tests/ci/autoscale_runners_lambda/build_and_deploy_archive.sh b/tests/ci/autoscale_runners_lambda/build_and_deploy_archive.sh deleted file mode 120000 index 96ba3fa024e..00000000000 --- a/tests/ci/autoscale_runners_lambda/build_and_deploy_archive.sh +++ /dev/null @@ -1 +0,0 @@ -../team_keys_lambda/build_and_deploy_archive.sh \ No newline at end of file diff --git a/tests/ci/autoscale_runners_lambda/lambda_shared b/tests/ci/autoscale_runners_lambda/lambda_shared deleted file mode 120000 index ba86e090f6c..00000000000 --- a/tests/ci/autoscale_runners_lambda/lambda_shared +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package/lambda_shared \ No newline at end of file diff --git a/tests/ci/autoscale_runners_lambda/requirements.txt b/tests/ci/autoscale_runners_lambda/requirements.txt deleted file mode 100644 index 098e04a9798..00000000000 --- a/tests/ci/autoscale_runners_lambda/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package diff --git a/tests/ci/autoscale_runners_lambda/test_autoscale.py b/tests/ci/autoscale_runners_lambda/test_autoscale.py deleted file mode 100644 index 3aeab49ffc7..00000000000 --- a/tests/ci/autoscale_runners_lambda/test_autoscale.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python - -import unittest -from dataclasses import dataclass -from typing import Any, List - -from app import Queue, set_capacity - - -@dataclass -class TestCase: - name: str - min_size: int - desired_capacity: int - max_size: int - queues: List[Queue] - expected_capacity: int - - -class TestSetCapacity(unittest.TestCase): - class FakeClient: - def __init__(self): - self._expected_data = {} # type: dict - self._expected_capacity = -1 - - @property - def expected_data(self) -> dict: - """a one-time property""" - data, self._expected_data = self._expected_data, {} - return data - - @expected_data.setter - def expected_data(self, value: dict) -> None: - self._expected_data = value - - @property - def expected_capacity(self) -> int: - """a one-time property""" - capacity, self._expected_capacity = self._expected_capacity, -1 - return capacity - - def describe_auto_scaling_groups(self, **kwargs: Any) -> dict: - _ = kwargs - return self.expected_data - - def set_desired_capacity(self, **kwargs: Any) -> None: - self._expected_capacity = kwargs["DesiredCapacity"] - - def data_helper( - self, name: str, min_size: int, desired_capacity: int, max_size: int - ) -> None: - self.expected_data = { - "AutoScalingGroups": [ - { - "AutoScalingGroupName": name, - "DesiredCapacity": desired_capacity, - "MinSize": min_size, - "MaxSize": max_size, - "Instances": [], # necessary for ins["HealthStatus"] check - } - ] - } - - def setUp(self): - self.client = self.FakeClient() - - def test_normal_cases(self): - test_cases = ( - # Do not change capacity - TestCase("noqueue", 1, 13, 20, [Queue("in_progress", 155, "noqueue")], -1), - TestCase("reserve", 1, 13, 20, [Queue("queued", 13, "reserve")], -1), - # Increase capacity - TestCase( - "increase-always", - 1, - 13, - 20, - [Queue("queued", 14, "increase-always")], - 14, - ), - TestCase("increase-1", 1, 13, 20, [Queue("queued", 23, "increase-1")], 17), - TestCase( - "style-checker", 1, 13, 20, [Queue("queued", 19, "style-checker")], 19 - ), - TestCase("increase-2", 1, 13, 20, [Queue("queued", 18, "increase-2")], 15), - TestCase("increase-3", 1, 13, 20, [Queue("queued", 183, "increase-3")], 20), - TestCase( - "increase-w/o reserve", - 1, - 13, - 20, - [ - Queue("in_progress", 11, "increase-w/o reserve"), - Queue("queued", 12, "increase-w/o reserve"), - ], - 17, - ), - TestCase("lower-min", 10, 5, 20, [Queue("queued", 5, "lower-min")], 10), - # Decrease capacity - TestCase("w/reserve", 1, 13, 20, [Queue("queued", 5, "w/reserve")], 5), - TestCase( - "style-checker", 1, 13, 20, [Queue("queued", 5, "style-checker")], 5 - ), - TestCase("w/reserve", 1, 23, 20, [Queue("queued", 17, "w/reserve")], 17), - TestCase("decrease", 1, 13, 20, [Queue("in_progress", 3, "decrease")], 3), - TestCase( - "style-checker", - 1, - 13, - 20, - [Queue("in_progress", 5, "style-checker")], - 5, - ), - ) - for t in test_cases: - self.client.data_helper(t.name, t.min_size, t.desired_capacity, t.max_size) - set_capacity(t.name, t.queues, self.client, False) - self.assertEqual(t.expected_capacity, self.client.expected_capacity, t.name) - - def test_effective_capacity(self): - """Normal cases test increasing w/o considering - effective_capacity much lower than DesiredCapacity""" - test_cases = ( - TestCase( - "desired-overwritten", - 1, - 20, # DesiredCapacity, overwritten by effective_capacity - 50, - [ - Queue("in_progress", 30, "desired-overwritten"), - Queue("queued", 60, "desired-overwritten"), - ], - 40, - ), - ) - for t in test_cases: - self.client.data_helper(t.name, t.min_size, t.desired_capacity, t.max_size) - # we test that effective_capacity is 30 (a half of 60) - data_with_instances = self.client.expected_data - data_with_instances["AutoScalingGroups"][0]["Instances"] = [ - {"HealthStatus": "Healthy" if i % 2 else "Unhealthy"} for i in range(60) - ] - self.client.expected_data = data_with_instances - set_capacity(t.name, t.queues, self.client, False) - self.assertEqual(t.expected_capacity, self.client.expected_capacity, t.name) - - def test_exceptions(self): - test_cases = ( - ( - TestCase( - "different names", - 1, - 1, - 1, - [Queue("queued", 5, "another name")], - -1, - ), - AssertionError, - ), - (TestCase("wrong queue len", 1, 1, 1, [], -1), AssertionError), - ( - TestCase( - "wrong queue", 1, 1, 1, [Queue("wrong", 1, "wrong queue")], -1 # type: ignore - ), - ValueError, - ), - ) - for t, error in test_cases: - with self.assertRaises(error): - self.client.data_helper( - t.name, t.min_size, t.desired_capacity, t.max_size - ) - set_capacity(t.name, t.queues, self.client, False) - - with self.assertRaises(AssertionError): - self.client.expected_data = {"AutoScalingGroups": [1, 2]} - set_capacity( - "wrong number of ASGs", - [Queue("queued", 1, "wrong number of ASGs")], - self.client, - ) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 39f34ed9ccf..77d91c8400b 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -12,7 +12,6 @@ import docker_images_helper from ci_config import CI from env_helper import REPO_COPY, S3_BUILDS_BUCKET, TEMP_PATH from git_helper import Git -from lambda_shared_package.lambda_shared.pr import Labels from pr_info import PRInfo from report import FAILURE, SUCCESS, JobReport, StatusType from stopwatch import Stopwatch @@ -108,7 +107,9 @@ def build_clickhouse( def is_release_pr(pr_info: PRInfo) -> bool: - return Labels.RELEASE in pr_info.labels or Labels.RELEASE_LTS in pr_info.labels + return ( + CI.Labels.RELEASE in pr_info.labels or CI.Labels.RELEASE_LTS in pr_info.labels + ) def get_release_or_pr(pr_info: PRInfo, version: ClickHouseVersion) -> Tuple[str, str]: diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py deleted file mode 100644 index 578ade5c8a0..00000000000 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ /dev/null @@ -1,376 +0,0 @@ -#!/usr/bin/env python3 - -import json -import time -from base64 import b64decode -from collections import namedtuple -from queue import Queue -from threading import Thread -from typing import Any, Dict, List, Optional - -import requests -from lambda_shared.pr import Labels -from lambda_shared.token import get_cached_access_token - -NEED_RERUN_OR_CANCELL_WORKFLOWS = { - "BackportPR", - "DocsCheck", - "MasterCI", - "PullRequestCI", -} - -MAX_RETRY = 5 - -DEBUG_INFO = {} # type: Dict[str, Any] - - -class Worker(Thread): - def __init__( - self, request_queue: Queue, token: str, ignore_exception: bool = False - ): - Thread.__init__(self) - self.queue = request_queue - self.token = token - self.ignore_exception = ignore_exception - self.response = {} # type: Dict - - def run(self): - m = self.queue.get() - try: - self.response = _exec_get_with_retry(m, self.token) - except Exception as e: - if not self.ignore_exception: - raise - print(f"Exception occured, still continue: {e}") - self.queue.task_done() - - -def _exec_get_with_retry(url: str, token: str) -> dict: - headers = {"Authorization": f"token {token}"} - e = Exception() - for i in range(MAX_RETRY): - try: - response = requests.get(url, headers=headers, timeout=30) - response.raise_for_status() - return response.json() # type: ignore - except Exception as ex: - print("Got exception executing request", ex) - e = ex - time.sleep(i + 1) - - raise requests.HTTPError("Cannot execute GET request with retries") from e - - -WorkflowDescription = namedtuple( - "WorkflowDescription", - [ - "url", - "run_id", - "name", - "head_sha", - "status", - "rerun_url", - "cancel_url", - "conclusion", - ], -) - - -def get_workflows_description_for_pull_request( - pull_request_event: dict, token: str -) -> List[WorkflowDescription]: - head_repo = pull_request_event["head"]["repo"]["full_name"] - head_branch = pull_request_event["head"]["ref"] - print("PR", pull_request_event["number"], "has head ref", head_branch) - - workflows_data = [] - repo_url = pull_request_event["base"]["repo"]["url"] - request_url = f"{repo_url}/actions/runs?per_page=100" - # Get all workflows for the current branch - for i in range(1, 11): - workflows = _exec_get_with_retry( - f"{request_url}&event=pull_request&branch={head_branch}&page={i}", token - ) - if not workflows["workflow_runs"]: - break - workflows_data += workflows["workflow_runs"] - if i == 10: - print("Too many workflows found") - - if not workflows_data: - print("No workflows found by filter") - return [] - - print(f"Total workflows for the branch {head_branch} found: {len(workflows_data)}") - - DEBUG_INFO["workflows"] = [] - workflow_descriptions = [] - for workflow in workflows_data: - # Some time workflow["head_repository"]["full_name"] is None - if workflow["head_repository"] is None: - continue - DEBUG_INFO["workflows"].append( - { - "full_name": workflow["head_repository"]["full_name"], - "name": workflow["name"], - "branch": workflow["head_branch"], - } - ) - # unfortunately we cannot filter workflows from forks in request to API - # so doing it manually - if ( - workflow["head_repository"]["full_name"] == head_repo - and workflow["name"] in NEED_RERUN_OR_CANCELL_WORKFLOWS - ): - workflow_descriptions.append( - WorkflowDescription( - url=workflow["url"], - run_id=workflow["id"], - name=workflow["name"], - head_sha=workflow["head_sha"], - status=workflow["status"], - rerun_url=workflow["rerun_url"], - cancel_url=workflow["cancel_url"], - conclusion=workflow["conclusion"], - ) - ) - - return workflow_descriptions - - -def get_workflow_description_fallback( - pull_request_event: dict, token: str -) -> List[WorkflowDescription]: - head_repo = pull_request_event["head"]["repo"]["full_name"] - head_branch = pull_request_event["head"]["ref"] - print("Get last 500 workflows from API to search related there") - # Fallback for a case of an already deleted branch and no workflows received - repo_url = pull_request_event["base"]["repo"]["url"] - request_url = f"{repo_url}/actions/runs?per_page=100" - q = Queue() # type: Queue - workers = [] - workflows_data = [] - i = 1 - for i in range(1, 6): - q.put(f"{request_url}&page={i}") - worker = Worker(q, token, True) - worker.start() - workers.append(worker) - - for worker in workers: - worker.join() - if not worker.response: - # We ignore get errors, so response can be empty - continue - # Prefilter workflows - workflows_data += [ - wf - for wf in worker.response["workflow_runs"] - if wf["head_repository"] is not None - and wf["head_repository"]["full_name"] == head_repo - and wf["head_branch"] == head_branch - and wf["name"] in NEED_RERUN_OR_CANCELL_WORKFLOWS - ] - - print(f"Total workflows in last 500 actions matches: {len(workflows_data)}") - - DEBUG_INFO["workflows"] = [ - { - "full_name": wf["head_repository"]["full_name"], - "name": wf["name"], - "branch": wf["head_branch"], - } - for wf in workflows_data - ] - - workflow_descriptions = [ - WorkflowDescription( - url=wf["url"], - run_id=wf["id"], - name=wf["name"], - head_sha=wf["head_sha"], - status=wf["status"], - rerun_url=wf["rerun_url"], - cancel_url=wf["cancel_url"], - conclusion=wf["conclusion"], - ) - for wf in workflows_data - ] - - return workflow_descriptions - - -def get_workflow_description(workflow_url: str, token: str) -> WorkflowDescription: - workflow = _exec_get_with_retry(workflow_url, token) - return WorkflowDescription( - url=workflow["url"], - run_id=workflow["id"], - name=workflow["name"], - head_sha=workflow["head_sha"], - status=workflow["status"], - rerun_url=workflow["rerun_url"], - cancel_url=workflow["cancel_url"], - conclusion=workflow["conclusion"], - ) - - -def _exec_post_with_retry(url: str, token: str, json: Optional[Any] = None) -> Any: - headers = {"Authorization": f"token {token}"} - e = Exception() - for i in range(MAX_RETRY): - try: - response = requests.post(url, headers=headers, json=json, timeout=30) - response.raise_for_status() - return response.json() - except Exception as ex: - print("Got exception executing request", ex) - e = ex - time.sleep(i + 1) - - raise requests.HTTPError("Cannot execute POST request with retry") from e - - -def exec_workflow_url(urls_to_post, token): - for url in urls_to_post: - print("Post for workflow workflow using url", url) - _exec_post_with_retry(url, token) - print("Workflow post finished") - - -def main(event): - token = get_cached_access_token() - DEBUG_INFO["event"] = event - if event["isBase64Encoded"]: - event_data = json.loads(b64decode(event["body"])) - else: - event_data = json.loads(event["body"]) - - print("Got event for PR", event_data["number"]) - action = event_data["action"] - print("Got action", event_data["action"]) - pull_request = event_data["pull_request"] - label = "" - if action == "labeled": - label = event_data["label"]["name"] - print("Added label:", label) - - print("PR has labels", {label["name"] for label in pull_request["labels"]}) - if action == "opened" or ( - action == "labeled" and pull_request["created_at"] == pull_request["updated_at"] - ): - print("Freshly opened PR, nothing to do") - return - - if action == "closed" or label == Labels.DO_NOT_TEST: - print("PR merged/closed or manually labeled 'do not test', will kill workflows") - workflow_descriptions = get_workflows_description_for_pull_request( - pull_request, token - ) - workflow_descriptions = ( - workflow_descriptions - or get_workflow_description_fallback(pull_request, token) - ) - urls_to_cancel = [] - for workflow_description in workflow_descriptions: - if ( - workflow_description.status != "completed" - and workflow_description.conclusion != "cancelled" - ): - urls_to_cancel.append(workflow_description.cancel_url) - print(f"Found {len(urls_to_cancel)} workflows to cancel") - exec_workflow_url(urls_to_cancel, token) - return - - if label == Labels.CAN_BE_TESTED: - print("PR marked with can be tested label, rerun workflow") - workflow_descriptions = get_workflows_description_for_pull_request( - pull_request, token - ) - workflow_descriptions = ( - workflow_descriptions - or get_workflow_description_fallback(pull_request, token) - ) - if not workflow_descriptions: - print("Not found any workflows") - return - - workflow_descriptions.sort(key=lambda x: x.run_id) # type: ignore - most_recent_workflow = workflow_descriptions[-1] - print("Latest workflow", most_recent_workflow) - if ( - most_recent_workflow.status != "completed" - and most_recent_workflow.conclusion != "cancelled" - ): - print("Latest workflow is not completed, cancelling") - exec_workflow_url([most_recent_workflow.cancel_url], token) - print("Cancelled") - - for _ in range(45): - # If the number of retries is changed: tune the lambda limits accordingly - latest_workflow_desc = get_workflow_description( - most_recent_workflow.url, token - ) - print("Checking latest workflow", latest_workflow_desc) - if latest_workflow_desc.status in ("completed", "cancelled"): - print("Finally latest workflow done, going to rerun") - exec_workflow_url([most_recent_workflow.rerun_url], token) - print("Rerun finished, exiting") - break - print("Still have strange status") - time.sleep(3) - return - - if action == "edited": - print("PR is edited - do nothing") - # error, _ = check_pr_description( - # pull_request["body"], pull_request["base"]["repo"]["full_name"] - # ) - # if error: - # print( - # f"The PR's body is wrong, is going to comment it. The error is: {error}" - # ) - # post_json = { - # "body": "This is an automatic comment. The PR descriptions does not " - # f"match the [template]({pull_request['base']['repo']['html_url']}/" - # "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1).\n\n" - # f"Please, edit it accordingly.\n\nThe error is: {error}" - # } - # _exec_post_with_retry(pull_request["comments_url"], token, json=post_json) - return - - if action == "synchronize": - print("PR is synchronized, going to stop old actions") - workflow_descriptions = get_workflows_description_for_pull_request( - pull_request, token - ) - workflow_descriptions = ( - workflow_descriptions - or get_workflow_description_fallback(pull_request, token) - ) - urls_to_cancel = [] - for workflow_description in workflow_descriptions: - if ( - workflow_description.status != "completed" - and workflow_description.conclusion != "cancelled" - and workflow_description.head_sha != pull_request["head"]["sha"] - ): - urls_to_cancel.append(workflow_description.cancel_url) - print(f"Found {len(urls_to_cancel)} workflows to cancel") - exec_workflow_url(urls_to_cancel, token) - return - - print("Nothing to do") - - -def handler(event, _): - try: - main(event) - - return { - "statusCode": 200, - "headers": {"Content-Type": "application/json"}, - "body": '{"status": "OK"}', - } - finally: - for name, value in DEBUG_INFO.items(): - print(f"Value of {name}: ", value) diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/build_and_deploy_archive.sh b/tests/ci/cancel_and_rerun_workflow_lambda/build_and_deploy_archive.sh deleted file mode 120000 index 96ba3fa024e..00000000000 --- a/tests/ci/cancel_and_rerun_workflow_lambda/build_and_deploy_archive.sh +++ /dev/null @@ -1 +0,0 @@ -../team_keys_lambda/build_and_deploy_archive.sh \ No newline at end of file diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/lambda_shared b/tests/ci/cancel_and_rerun_workflow_lambda/lambda_shared deleted file mode 120000 index ba86e090f6c..00000000000 --- a/tests/ci/cancel_and_rerun_workflow_lambda/lambda_shared +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package/lambda_shared \ No newline at end of file diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt b/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt deleted file mode 100644 index 4cb3fba0f7b..00000000000 --- a/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package[token] diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 459be12ada0..a7fc6d02853 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -38,7 +38,7 @@ from env_helper import TEMP_PATH from get_robot_token import get_best_robot_token from git_helper import GIT_PREFIX, git_runner, is_shallow from github_helper import GitHub, PullRequest, PullRequests, Repository -from lambda_shared_package.lambda_shared.pr import Labels +from ci_config import Labels from ssh import SSHKey diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 8dcf3fc4c69..171819e2632 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -48,7 +48,19 @@ from git_helper import GIT_PREFIX, Git from git_helper import Runner as GitRunner from github_helper import GitHub from pr_info import PRInfo -from report import ERROR, FAILURE, PENDING, SUCCESS, BuildResult, JobReport, TestResult +from report import ( + ERROR, + FAILURE, + PENDING, + SUCCESS, + BuildResult, + JobReport, + TestResult, + OK, + JOB_STARTED_TEST_NAME, + JOB_FINISHED_TEST_NAME, + FAIL, +) from s3_helper import S3Helper from stopwatch import Stopwatch from tee_popen import TeePopen @@ -263,7 +275,8 @@ def check_missing_images_on_dockerhub( return result -def _pre_action(s3, indata, pr_info): +def _pre_action(s3, job_name, batch, indata, pr_info): + no_cache = CiSettings.create_from_run_config(indata).no_ci_cache print("Clear dmesg") Utils.clear_dmesg() CommitStatusData.cleanup() @@ -282,6 +295,90 @@ def _pre_action(s3, indata, pr_info): ci_cache.dump_run_config(indata) + to_be_skipped = False + skip_status = SUCCESS + # check if job was run already + if CI.is_build_job(job_name): + # this is a build job - check if a build report is present + build_result = ( + BuildResult.load_any(job_name, pr_info.number, pr_info.head_ref) + if not no_cache + else None + ) + if build_result: + if build_result.status == SUCCESS: + to_be_skipped = True + else: + print( + "Build report found but status is unsuccessful - will try to rerun" + ) + print("::group::Build Report") + print(build_result.as_json()) + print("::endgroup::") + else: + # this is a test job - check if GH commit status or cache record is present + commit = get_commit(GitHub(get_best_robot_token(), per_page=100), pr_info.sha) + # rerun helper check + # FIXME: Find a way to identify if job restarted manually (by developer) or by automatic workflow restart (died spot-instance) + # disable rerun check for the former + if job_name not in ( + CI.JobNames.BUILD_CHECK, + ): # we might want to rerun build report job + rerun_helper = RerunHelper(commit, _get_ext_check_name(job_name)) + if rerun_helper.is_already_finished_by_status(): + print("WARNING: Rerunning job with GH status ") + status = rerun_helper.get_finished_status() + assert status + print("::group::Commit Status") + print(status) + print("::endgroup::") + to_be_skipped = True + skip_status = status.state + + # ci cache check + if not to_be_skipped and not no_cache: + ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() + job_config = CI.get_job_config(job_name) + if ci_cache.is_successful( + job_name, + batch, + job_config.num_batches, + job_config.required_on_release_branch, + ): + print("CICache record has be found - job will be skipped") + job_status = ci_cache.get_successful( + job_name, batch, job_config.num_batches + ) + assert job_status, "BUG" + _create_gh_status( + commit, + job_name, + batch, + job_config.num_batches, + job_status, + ) + to_be_skipped = True + # skip_status = SUCCESS already there + GHActions.print_in_group("Commit Status Data", job_status) + + # create pre report + jr = JobReport.create_pre_report(status=skip_status, job_skipped=to_be_skipped) + jr.dump() + + if not to_be_skipped: + print("push start record to ci db") + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + [TestResult(JOB_STARTED_TEST_NAME, OK)], + SUCCESS, + 0.0, + JobReport.get_start_time_from_current(), + "", + _get_ext_check_name(job_name), + ) + ClickHouseHelper().insert_events_into( + db="default", table="checks", events=prepared_events + ) print(f"Pre action done. Report files [{reports_files}] have been downloaded") @@ -1013,13 +1110,14 @@ def main() -> int: ci_cache.print_status() if IS_CI and not pr_info.is_merge_queue: - # wait for pending jobs to be finished, await_jobs is a long blocking call - ci_cache.await_pending_jobs(pr_info.is_release) if pr_info.is_release: print("Release/master: CI Cache add pending records for all todo jobs") ci_cache.push_pending_all(pr_info.is_release) + # wait for pending jobs to be finished, await_jobs is a long blocking call + ci_cache.await_pending_jobs(pr_info.is_release) + # conclude results result["git_ref"] = git_ref result["version"] = version @@ -1045,99 +1143,23 @@ def main() -> int: ### PRE action: start elif args.pre: assert indata, "Run config must be provided via --infile" - _pre_action(s3, indata, pr_info) - JobReport.create_pre_report().dump() + _pre_action(s3, args.job_name, args.batch, indata, pr_info) ### RUN action: start elif args.run: assert indata - ci_settings = CiSettings.create_from_run_config(indata) + job_report = JobReport.load() check_name = args.job_name check_name_with_group = _get_ext_check_name(check_name) print( f"Check if rerun for name: [{check_name}], extended name [{check_name_with_group}]" ) - previous_status = None - if CI.is_build_job(check_name): - # this is a build job - check if a build report is present - build_result = ( - BuildResult.load_any(check_name, pr_info.number, pr_info.head_ref) - if not ci_settings.no_ci_cache - else None - ) - if build_result: - if build_result.status == SUCCESS: - previous_status = build_result.status - else: - # FIXME: Consider reusing failures for build jobs. - # Just remove this if/else - that makes build job starting and failing immediately - print( - "Build report found but status is unsuccessful - will try to rerun" - ) - print("::group::Build Report") - print(build_result.as_json()) - print("::endgroup::") - else: - # this is a test job - check if GH commit status or cache record is present - commit = get_commit( - GitHub(get_best_robot_token(), per_page=100), pr_info.sha - ) - # rerun helper check - # FIXME: Find a way to identify if job restarted manually (by developer) or by automatic workflow restart (died spot-instance) - # disable rerun check for the former - if check_name not in ( - CI.JobNames.BUILD_CHECK, - ): # we might want to rerun build report job - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - print("WARNING: Rerunning job with GH status ") - status = rerun_helper.get_finished_status() - assert status - print("::group::Commit Status") - print(status) - print("::endgroup::") - previous_status = status.state - print("Create dummy job report with job_skipped flag") - JobReport( - status=status.state, - description="", - test_results=[], - start_time="", - duration=0.0, - additional_files=[], - job_skipped=True, - ).dump() - - # ci cache check - if not previous_status and not ci_settings.no_ci_cache: - ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() - job_config = CI.get_job_config(check_name) - if ci_cache.is_successful( - check_name, - args.batch, - job_config.num_batches, - job_config.required_on_release_branch, - ): - job_status = ci_cache.get_successful( - check_name, args.batch, job_config.num_batches - ) - assert job_status, "BUG" - _create_gh_status( - commit, - check_name, - args.batch, - job_config.num_batches, - job_status, - ) - previous_status = job_status.status - GHActions.print_in_group("Commit Status Data", job_status) - - if previous_status and not args.force: + if job_report.job_skipped and not args.force: print( - f"Commit status or Build Report is already present - job will be skipped with status: [{previous_status}]" + f"Commit status or Build Report is already present - job will be skipped with status: [{job_report.status}]" ) - if previous_status == SUCCESS: + if job_report.status == SUCCESS: exit_code = 0 else: exit_code = 1 @@ -1157,7 +1179,8 @@ def main() -> int: assert ( job_report ), "BUG. There must be job report either real report, or pre-report if job was killed" - if not job_report.job_skipped and not job_report.pre_report: + error_description = "" + if not job_report.pre_report: # it's a real job report ch_helper = ClickHouseHelper() check_url = "" @@ -1235,7 +1258,6 @@ def main() -> int: pr_info, dump_to_file=True, ) - print(f"Job report url: [{check_url}]") prepared_events = prepare_tests_results_for_clickhouse( pr_info, @@ -1260,18 +1282,25 @@ def main() -> int: ) elif job_report.job_skipped: print(f"Skipped after rerun check {[args.job_name]} - do nothing") - elif job_report.job_skipped: - print(f"Job was skipped {[args.job_name]} - do nothing") - elif job_report.pre_report: + else: print(f"ERROR: Job was killed - generate evidence") job_report.update_duration() - # Job was killed! + ret_code = os.getenv("JOB_EXIT_CODE", "") + if ret_code: + try: + job_report.exit_code = int(ret_code) + except ValueError: + pass if Utils.is_killed_with_oom(): print("WARNING: OOM while job execution") - error = f"Out Of Memory, exit_code {job_report.exit_code}, after {job_report.duration}s" + print(subprocess.run("sudo dmesg -T", check=False)) + error_description = f"Out Of Memory, exit_code {job_report.exit_code}" else: - error = f"Unknown, exit_code {job_report.exit_code}, after {job_report.duration}s" - CIBuddy().post_error(error, job_name=_get_ext_check_name(args.job_name)) + error_description = f"Unknown, exit_code {job_report.exit_code}" + CIBuddy().post_job_error( + error_description + f" after {int(job_report.duration)}s", + job_name=_get_ext_check_name(args.job_name), + ) if CI.is_test_job(args.job_name): gh = GitHub(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) @@ -1279,11 +1308,32 @@ def main() -> int: commit, ERROR, "", - "Error: " + error, + "Error: " + error_description, _get_ext_check_name(args.job_name), pr_info, dump_to_file=True, ) + + if not job_report.job_skipped: + print("push finish record to ci db") + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + [ + TestResult( + JOB_FINISHED_TEST_NAME, + FAIL if error_description else OK, + raw_logs=error_description or None, + ) + ], + SUCCESS if not error_description else ERROR, + 0.0, + JobReport.get_start_time_from_current(), + "", + _get_ext_check_name(args.job_name), + ) + ClickHouseHelper().insert_events_into( + db="default", table="checks", events=prepared_events + ) ### POST action: end ### MARK SUCCESS action: start diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index ea690bb602c..dfb5885270a 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -1,12 +1,14 @@ +import argparse import json import os +from typing import Union, Dict import boto3 import requests from botocore.exceptions import ClientError from pr_info import PRInfo -from ci_utils import Shell +from ci_utils import Shell, GHActions class CIBuddy: @@ -26,6 +28,13 @@ class CIBuddy: self.pr_number = pr_info.number self.head_ref = pr_info.head_ref self.commit_url = pr_info.commit_html_url + self.sha = pr_info.sha[:10] + + def check_workflow(self): + GHActions.print_workflow_results() + res = GHActions.get_workflow_job_result(GHActions.ActionsNames.RunConfig) + if res != GHActions.ActionStatuses.SUCCESS: + self.post_job_error("Workflow Configuration Failed", critical=True) @staticmethod def _get_webhooks(): @@ -59,18 +68,70 @@ class CIBuddy: except Exception as e: print(f"ERROR: Failed to post message, ex {e}") - def post_error(self, error_description, job_name="", with_instance_info=True): + def _post_formatted( + self, title: str, body: Union[Dict, str], with_wf_link: bool + ) -> None: + message = title + if isinstance(body, dict): + for name, value in body.items(): + if "commit_sha" in name: + value = ( + f"" + ) + message += f" *{name}*: {value}\n" + else: + message += body + "\n" + run_id = os.getenv("GITHUB_RUN_ID", "") + if with_wf_link and run_id: + message += f" *workflow*: \n" + self.post(message) + + def post_info( + self, title: str, body: Union[Dict, str], with_wf_link: bool = True + ) -> None: + title_extended = f":white_circle: *{title}*\n\n" + self._post_formatted(title_extended, body, with_wf_link) + + def post_done( + self, title: str, body: Union[Dict, str], with_wf_link: bool = True + ) -> None: + title_extended = f":white_check_mark: *{title}*\n\n" + self._post_formatted(title_extended, body, with_wf_link) + + def post_warning( + self, title: str, body: Union[Dict, str], with_wf_link: bool = True + ) -> None: + title_extended = f":warning: *{title}*\n\n" + self._post_formatted(title_extended, body, with_wf_link) + + def post_critical( + self, title: str, body: Union[Dict, str], with_wf_link: bool = True + ) -> None: + title_extended = f":black_circle: *{title}*\n\n" + self._post_formatted(title_extended, body, with_wf_link) + + def post_job_error( + self, + error_description: str, + job_name: str = "", + with_instance_info: bool = True, + with_wf_link: bool = True, + critical: bool = False, + ) -> None: instance_id, instance_type = "unknown", "unknown" if with_instance_info: instance_id = Shell.run("ec2metadata --instance-id") or instance_id instance_type = Shell.run("ec2metadata --instance-type") or instance_type if not job_name: job_name = os.getenv("CHECK_NAME", "unknown") - line_err = f":red_circle: *Error: {error_description}*\n\n" + sign = ":red_circle:" if not critical else ":black_circle:" + line_err = f"{sign} *Error: {error_description}*\n\n" line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" line_job = f" *Job:* `{job_name}`\n" - line_pr_ = f" *PR:* \n" - line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" + line_pr_ = f" *PR:* , <{self.commit_url}|{self.sha}>\n" + line_br_ = ( + f" *Branch:* `{self.head_ref}`, <{self.commit_url}|{self.sha}>\n" + ) message = line_err message += line_job if with_instance_info: @@ -79,10 +140,36 @@ class CIBuddy: message += line_pr_ else: message += line_br_ + run_id = os.getenv("GITHUB_RUN_ID", "") + if with_wf_link and run_id: + message += f" *workflow*: \n" self.post(message) +def parse_args(): + parser = argparse.ArgumentParser("CI Buddy bot notifies about CI events") + parser.add_argument( + "--check-wf-status", + action="store_true", + help="Checks workflow status", + ) + parser.add_argument( + "--test", + action="store_true", + help="for test and debug", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="dry run mode", + ) + return parser.parse_args(), parser + + if __name__ == "__main__": - # test - buddy = CIBuddy(dry_run=True) - buddy.post_error("Out of memory") + args, parser = parse_args() + + if args.test: + CIBuddy(dry_run=True).post_job_error("TEst") + elif args.check_wf_status: + CIBuddy(dry_run=args.dry_run).check_workflow() diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 5229c754d70..85eabb84f9f 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -638,7 +638,14 @@ class CiCache: pushes pending records for all jobs that supposed to be run """ for job, job_config in self.jobs_to_do.items(): - if not job_config.has_digest(): + if ( + job in self.jobs_to_wait + or not job_config.has_digest() + or job_config.disable_await + ): + # 1. "job in self.jobs_to_wait" - this job already has a pending record in cache + # 2. "not job_config.has_digest()" - cache is not used for these jobs + # 3. "job_config.disable_await" - await is explicitly disabled continue pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL) assert job_config.batches @@ -708,8 +715,9 @@ class CiCache: Filter is to be applied in PRs to remove jobs that are not affected by the change :return: """ - remove_from_to_do = [] + remove_from_workflow = [] required_builds = [] + has_test_jobs_to_skip = False for job_name, job_config in self.jobs_to_do.items(): if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: if job_config.reference_job_name: @@ -722,24 +730,21 @@ class CiCache: job=reference_name, job_config=reference_config, ): - remove_from_to_do.append(job_name) + remove_from_workflow.append(job_name) + has_test_jobs_to_skip = True else: required_builds += ( job_config.required_builds if job_config.required_builds else [] ) + if has_test_jobs_to_skip: + # If there are tests to skip, it means builds are not affected as well. + # No need to test builds. Let's keep all builds required for test jobs and skip the others + for job_name, job_config in self.jobs_to_do.items(): + if CI.is_build_job(job_name): + if job_name not in required_builds: + remove_from_workflow.append(job_name) - has_builds_to_do = False - for job_name, job_config in self.jobs_to_do.items(): - if CI.is_build_job(job_name): - if job_name not in required_builds: - remove_from_to_do.append(job_name) - else: - has_builds_to_do = True - - if not has_builds_to_do: - remove_from_to_do.append(CI.JobNames.BUILD_CHECK) - - for job in remove_from_to_do: + for job in remove_from_workflow: print(f"Filter job [{job}] - not affected by the change") if job in self.jobs_to_do: del self.jobs_to_do[job] @@ -748,6 +753,19 @@ class CiCache: if job in self.jobs_to_skip: self.jobs_to_skip.remove(job) + # special handling for the special job: BUILD_CHECK + has_builds = False + for job in list(self.jobs_to_do) + self.jobs_to_skip: + if CI.is_build_job(job): + has_builds = True + break + if not has_builds: + if CI.JobNames.BUILD_CHECK in self.jobs_to_do: + print( + f"Filter job [{CI.JobNames.BUILD_CHECK}] - no builds are required in the workflow" + ) + del self.jobs_to_do[CI.JobNames.BUILD_CHECK] + def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ await pending jobs to be finished @@ -762,22 +780,13 @@ class CiCache: # TIMEOUT * MAX_ROUNDS_TO_WAIT must be less than 6h (GH job timeout) with a room for rest RunConfig work TIMEOUT = 3000 # 50 min MAX_ROUNDS_TO_WAIT = 6 - MAX_JOB_NUM_TO_WAIT = 3 round_cnt = 0 - def _has_build_job(): - for job in self.jobs_to_wait: - if CI.is_build_job(job): - return True - return False - if not is_release: # in PRs we can wait only for builds, TIMEOUT*MAX_ROUNDS_TO_WAIT=100min is enough MAX_ROUNDS_TO_WAIT = 2 - while ( - len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT or _has_build_job() - ) and round_cnt < MAX_ROUNDS_TO_WAIT: + while round_cnt < MAX_ROUNDS_TO_WAIT: round_cnt += 1 GHActions.print_in_group( f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:", @@ -819,6 +828,10 @@ class CiCache: f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore" ) job_config.batches.remove(batch) + if not job_config.batches: + print(f"Remove job [{job_name}] from jobs_to_do") + self.jobs_to_skip.append(job_name) + del self.jobs_to_do[job_name] else: print( f"NOTE: Job [{job_name}:{batch}] finished failed - do not add to ready" @@ -829,9 +842,7 @@ class CiCache: await_finished.add(job_name) for job in await_finished: - self.jobs_to_skip.append(job) del self.jobs_to_wait[job] - del self.jobs_to_do[job] if not dry_run: expired_sec = int(time.time()) - start_at @@ -895,3 +906,87 @@ class CiCache: self.jobs_to_wait[job] = job_config return self + + +if __name__ == "__main__": + # for testing + job_digest = { + "package_release": "bbbd3519d1", + "package_aarch64": "bbbd3519d1", + "package_asan": "bbbd3519d1", + "package_ubsan": "bbbd3519d1", + "package_tsan": "bbbd3519d1", + "package_msan": "bbbd3519d1", + "package_debug": "bbbd3519d1", + "package_release_coverage": "bbbd3519d1", + "binary_release": "bbbd3519d1", + "binary_tidy": "bbbd3519d1", + "binary_darwin": "bbbd3519d1", + "binary_aarch64": "bbbd3519d1", + "binary_aarch64_v80compat": "bbbd3519d1", + "binary_freebsd": "bbbd3519d1", + "binary_darwin_aarch64": "bbbd3519d1", + "binary_ppc64le": "bbbd3519d1", + "binary_amd64_compat": "bbbd3519d1", + "binary_amd64_musl": "bbbd3519d1", + "binary_riscv64": "bbbd3519d1", + "binary_s390x": "bbbd3519d1", + "binary_loongarch64": "bbbd3519d1", + "Builds": "f5dffeecb8", + "Install packages (release)": "ba0c89660e", + "Install packages (aarch64)": "ba0c89660e", + "Stateful tests (asan)": "32a9a1aba9", + "Stateful tests (tsan)": "32a9a1aba9", + "Stateful tests (msan)": "32a9a1aba9", + "Stateful tests (ubsan)": "32a9a1aba9", + "Stateful tests (debug)": "32a9a1aba9", + "Stateful tests (release)": "32a9a1aba9", + "Stateful tests (coverage)": "32a9a1aba9", + "Stateful tests (aarch64)": "32a9a1aba9", + "Stateful tests (release, ParallelReplicas)": "32a9a1aba9", + "Stateful tests (debug, ParallelReplicas)": "32a9a1aba9", + "Stateless tests (asan)": "deb6778b88", + "Stateless tests (tsan)": "deb6778b88", + "Stateless tests (msan)": "deb6778b88", + "Stateless tests (ubsan)": "deb6778b88", + "Stateless tests (debug)": "deb6778b88", + "Stateless tests (release)": "deb6778b88", + "Stateless tests (coverage)": "deb6778b88", + "Stateless tests (aarch64)": "deb6778b88", + "Stateless tests (release, old analyzer, s3, DatabaseReplicated)": "deb6778b88", + "Stateless tests (debug, s3 storage)": "deb6778b88", + "Stateless tests (tsan, s3 storage)": "deb6778b88", + "Stress test (debug)": "aa298abf10", + "Stress test (tsan)": "aa298abf10", + "Upgrade check (debug)": "5ce4d3ee02", + "Integration tests (asan, old analyzer)": "42e58be3aa", + "Integration tests (tsan)": "42e58be3aa", + "Integration tests (aarch64)": "42e58be3aa", + "Integration tests flaky check (asan)": "42e58be3aa", + "Compatibility check (release)": "ecb69d8c4b", + "Compatibility check (aarch64)": "ecb69d8c4b", + "Unit tests (release)": "09d00b702e", + "Unit tests (asan)": "09d00b702e", + "Unit tests (msan)": "09d00b702e", + "Unit tests (tsan)": "09d00b702e", + "Unit tests (ubsan)": "09d00b702e", + "AST fuzzer (debug)": "c38ebf947f", + "AST fuzzer (asan)": "c38ebf947f", + "AST fuzzer (msan)": "c38ebf947f", + "AST fuzzer (tsan)": "c38ebf947f", + "AST fuzzer (ubsan)": "c38ebf947f", + "Stateless tests flaky check (asan)": "deb6778b88", + "Performance Comparison (release)": "a8a7179258", + "ClickBench (release)": "45c07c4aa6", + "ClickBench (aarch64)": "45c07c4aa6", + "Docker server image": "6a24d5b187", + "Docker keeper image": "6a24d5b187", + "Docs check": "4764154c62", + "Fast test": "cb269133f2", + "Style check": "ffffffffff", + "Stateful tests (ubsan, ParallelReplicas)": "32a9a1aba9", + "Stress test (msan)": "aa298abf10", + "Upgrade check (asan)": "5ce4d3ee02", + } + ci_cache = CiCache(job_digests=job_digest, cache_enabled=True, s3=S3Helper()) + ci_cache.update() diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 9b9ddee5326..a44b15f34c1 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -32,6 +32,12 @@ class CI: from ci_definitions import MQ_JOBS as MQ_JOBS from ci_definitions import WorkflowStages as WorkflowStages from ci_definitions import Runners as Runners + from ci_utils import Envs as Envs + from ci_utils import Utils as Utils + from ci_utils import GHActions as GHActions + from ci_definitions import Labels as Labels + from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS + from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL # Jobs that run for doc related updates _DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK] @@ -45,24 +51,14 @@ class CI: JobNames.INTEGRATION_TEST_ARM, ] ), - Tags.CI_SET_REQUIRED: LabelConfig(run_jobs=REQUIRED_CHECKS), + Tags.CI_SET_REQUIRED: LabelConfig( + run_jobs=REQUIRED_CHECKS + + [build for build in BuildNames if build != BuildNames.FUZZERS] + ), Tags.CI_SET_BUILDS: LabelConfig( run_jobs=[JobNames.STYLE_CHECK, JobNames.BUILD_CHECK] + [build for build in BuildNames if build != BuildNames.FUZZERS] ), - Tags.CI_SET_NON_REQUIRED: LabelConfig( - run_jobs=[job for job in JobNames if job not in REQUIRED_CHECKS] - ), - Tags.CI_SET_OLD_ANALYZER: LabelConfig( - run_jobs=[ - JobNames.STYLE_CHECK, - JobNames.FAST_TEST, - BuildNames.PACKAGE_RELEASE, - BuildNames.PACKAGE_ASAN, - JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, - JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, - ] - ), Tags.CI_SET_SYNC: LabelConfig( run_jobs=[ BuildNames.PACKAGE_ASAN, @@ -317,13 +313,13 @@ class CI: required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2 ), JobNames.STATELESS_TEST_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_TSAN], num_batches=2 + required_builds=[BuildNames.PACKAGE_TSAN], num_batches=4 ), JobNames.STATELESS_TEST_MSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_MSAN], num_batches=3 + required_builds=[BuildNames.PACKAGE_MSAN], num_batches=4 ), JobNames.STATELESS_TEST_UBSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_UBSAN], num_batches=1 + required_builds=[BuildNames.PACKAGE_UBSAN], num_batches=2 ), JobNames.STATELESS_TEST_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 @@ -332,24 +328,24 @@ class CI: required_builds=[BuildNames.PACKAGE_RELEASE], ), JobNames.STATELESS_TEST_RELEASE_COVERAGE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE], num_batches=5 + required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE], num_batches=6 ), JobNames.STATELESS_TEST_AARCH64: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=3 + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 ), JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2, release_only=True + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=3, release_only=True ), JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], - num_batches=3, + num_batches=4, ), JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], @@ -415,7 +411,8 @@ class CI: JobNames.INTEGRATION_TEST_FLAKY: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True, - reference_job_name=JobNames.INTEGRATION_TEST_TSAN, + # TODO: approach with reference job names does not work because digest may not be calculated if job skipped in wf + # reference_job_name=JobNames.INTEGRATION_TEST_TSAN, ), JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], @@ -460,7 +457,8 @@ class CI: required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True, timeout=3600, - reference_job_name=JobNames.STATELESS_TEST_RELEASE, + # TODO: approach with reference job names does not work because digest may not be calculated if job skipped in wf + # reference_job_name=JobNames.STATELESS_TEST_RELEASE, ), JobNames.JEPSEN_KEEPER: JobConfig( required_builds=[BuildNames.BINARY_RELEASE], diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index acd9b7fa904..a8d9793f1d3 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -7,6 +7,53 @@ from ci_utils import WithIter from integration_test_images import IMAGES +class Labels: + PR_BUGFIX = "pr-bugfix" + PR_CRITICAL_BUGFIX = "pr-critical-bugfix" + CAN_BE_TESTED = "can be tested" + DO_NOT_TEST = "do not test" + MUST_BACKPORT = "pr-must-backport" + MUST_BACKPORT_CLOUD = "pr-must-backport-cloud" + JEPSEN_TEST = "jepsen-test" + SKIP_MERGEABLE_CHECK = "skip mergeable check" + PR_BACKPORT = "pr-backport" + PR_BACKPORTS_CREATED = "pr-backports-created" + PR_BACKPORTS_CREATED_CLOUD = "pr-backports-created-cloud" + PR_CHERRYPICK = "pr-cherrypick" + PR_CI = "pr-ci" + PR_FEATURE = "pr-feature" + PR_SYNCED_TO_CLOUD = "pr-synced-to-cloud" + PR_SYNC_UPSTREAM = "pr-sync-upstream" + RELEASE = "release" + RELEASE_LTS = "release-lts" + SUBMODULE_CHANGED = "submodule changed" + + # automatic backport for critical bug fixes + AUTO_BACKPORT = {"pr-critical-bugfix"} + + +TRUSTED_CONTRIBUTORS = { + e.lower() + for e in [ + "amosbird", + "azat", # SEMRush + "bharatnc", # Many contributions. + "cwurm", # ClickHouse, Inc + "den-crane", # Documentation contributor + "ildus", # adjust, ex-pgpro + "nvartolomei", # Seasoned contributor, CloudFlare + "taiyang-li", + "ucasFL", # Amos Bird's friend + "thomoco", # ClickHouse, Inc + "tonickkozlov", # Cloudflare + "tylerhannan", # ClickHouse, Inc + "tsolodov", # ClickHouse, Inc + "justindeguzman", # ClickHouse, Inc + "XuJia0210", # ClickHouse, Inc + ] +} + + class WorkflowStages(metaclass=WithIter): """ Stages of GitHUb actions workflow @@ -55,8 +102,6 @@ class Tags(metaclass=WithIter): CI_SET_ARM = "ci_set_arm" CI_SET_REQUIRED = "ci_set_required" CI_SET_BUILDS = "ci_set_builds" - CI_SET_NON_REQUIRED = "ci_set_non_required" - CI_SET_OLD_ANALYZER = "ci_set_old_analyzer" libFuzzer = "libFuzzer" @@ -306,6 +351,8 @@ class JobConfig: run_by_label: str = "" # to run always regardless of the job digest or/and label run_always: bool = False + # disables CI await for a given job + disable_await: bool = False # if the job needs to be run on the release branch, including master (building packages, docker server). # NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able. required_on_release_branch: bool = False @@ -350,6 +397,7 @@ class CommonJobConfigs: ], ), runner_type=Runners.STYLE_CHECKER_ARM, + disable_await=True, ) COMPATIBILITY_TEST = JobConfig( job_name_keyword="compatibility", @@ -385,7 +433,7 @@ class CommonJobConfigs: ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=7200, + timeout=9000, ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py deleted file mode 100644 index 47161215a97..00000000000 --- a/tests/ci/ci_runners_metrics_lambda/app.py +++ /dev/null @@ -1,164 +0,0 @@ -#!/usr/bin/env python3 -""" -Lambda function to: - - calculate number of running runners - - cleaning dead runners from GitHub - - terminating stale lost runners in EC2 -""" - -import argparse -import sys -from typing import Dict - -import boto3 # type: ignore -from lambda_shared import RUNNER_TYPE_LABELS, RunnerDescriptions, list_runners -from lambda_shared.token import ( - get_access_token_by_key_app, - get_cached_access_token, - get_key_and_app_from_aws, -) - -UNIVERSAL_LABEL = "universal" - - -def handler(event, context): - _ = event - _ = context - main(get_cached_access_token(), True) - - -def group_runners_by_tag( - listed_runners: RunnerDescriptions, -) -> Dict[str, RunnerDescriptions]: - result = {} # type: Dict[str, RunnerDescriptions] - - def add_to_result(tag, runner): - if tag not in result: - result[tag] = [] - result[tag].append(runner) - - for runner in listed_runners: - if UNIVERSAL_LABEL in runner.tags: - # Do not proceed other labels if UNIVERSAL_LABEL is included - add_to_result(UNIVERSAL_LABEL, runner) - continue - - for tag in runner.tags: - if tag in RUNNER_TYPE_LABELS: - add_to_result(tag, runner) - break - else: - add_to_result("unlabeled", runner) - return result - - -def push_metrics_to_cloudwatch( - listed_runners: RunnerDescriptions, group_name: str -) -> None: - client = boto3.client("cloudwatch") - namespace = "RunnersMetrics" - metrics_data = [] - busy_runners = sum( - 1 for runner in listed_runners if runner.busy and not runner.offline - ) - dimensions = [{"Name": "group", "Value": group_name}] - metrics_data.append( - { - "MetricName": "BusyRunners", - "Value": busy_runners, - "Unit": "Count", - "Dimensions": dimensions, - } - ) - total_active_runners = sum(1 for runner in listed_runners if not runner.offline) - metrics_data.append( - { - "MetricName": "ActiveRunners", - "Value": total_active_runners, - "Unit": "Count", - "Dimensions": dimensions, - } - ) - total_runners = len(listed_runners) - metrics_data.append( - { - "MetricName": "TotalRunners", - "Value": total_runners, - "Unit": "Count", - "Dimensions": dimensions, - } - ) - if total_active_runners == 0: - busy_ratio = 100.0 - else: - busy_ratio = busy_runners / total_active_runners * 100 - - metrics_data.append( - { - "MetricName": "BusyRunnersRatio", - "Value": busy_ratio, - "Unit": "Percent", - "Dimensions": dimensions, - } - ) - - client.put_metric_data(Namespace=namespace, MetricData=metrics_data) - - -def main( - access_token: str, - push_to_cloudwatch: bool, -) -> None: - gh_runners = list_runners(access_token) - grouped_runners = group_runners_by_tag(gh_runners) - for group, group_runners in grouped_runners.items(): - if push_to_cloudwatch: - print(f"Pushing metrics for group '{group}'") - push_metrics_to_cloudwatch(group_runners, group) - else: - print(group, f"({len(group_runners)})") - for runner in group_runners: - print("\t", runner) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Get list of runners and their states") - parser.add_argument( - "-p", "--private-key-path", help="Path to file with private key" - ) - parser.add_argument("-k", "--private-key", help="Private key") - parser.add_argument( - "-a", "--app-id", type=int, help="GitHub application ID", required=True - ) - parser.add_argument( - "--push-to-cloudwatch", - action="store_true", - help="Push metrics for active and busy runners to cloudwatch", - ) - - args = parser.parse_args() - - if not args.private_key_path and not args.private_key: - print( - "Either --private-key-path or --private-key must be specified", - file=sys.stderr, - ) - - if args.private_key_path and args.private_key: - print( - "Either --private-key-path or --private-key must be specified", - file=sys.stderr, - ) - - if args.private_key: - private_key = args.private_key - elif args.private_key_path: - with open(args.private_key_path, "r", encoding="utf-8") as key_file: - private_key = key_file.read() - else: - print("Attempt to get key and id from AWS secret manager") - private_key, args.app_id = get_key_and_app_from_aws() - - token = get_access_token_by_key_app(private_key, args.app_id) - - main(token, args.push_to_cloudwatch) diff --git a/tests/ci/ci_runners_metrics_lambda/build_and_deploy_archive.sh b/tests/ci/ci_runners_metrics_lambda/build_and_deploy_archive.sh deleted file mode 120000 index 96ba3fa024e..00000000000 --- a/tests/ci/ci_runners_metrics_lambda/build_and_deploy_archive.sh +++ /dev/null @@ -1 +0,0 @@ -../team_keys_lambda/build_and_deploy_archive.sh \ No newline at end of file diff --git a/tests/ci/ci_runners_metrics_lambda/lambda_shared b/tests/ci/ci_runners_metrics_lambda/lambda_shared deleted file mode 120000 index ba86e090f6c..00000000000 --- a/tests/ci/ci_runners_metrics_lambda/lambda_shared +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package/lambda_shared \ No newline at end of file diff --git a/tests/ci/ci_runners_metrics_lambda/requirements.txt b/tests/ci/ci_runners_metrics_lambda/requirements.txt deleted file mode 100644 index e2b16067a93..00000000000 --- a/tests/ci/ci_runners_metrics_lambda/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -../lambda_shared_package -../lambda_shared_package[token] diff --git a/tests/ci/ci_settings.py b/tests/ci/ci_settings.py index 7b2dd12c310..c29c5777dba 100644 --- a/tests/ci/ci_settings.py +++ b/tests/ci/ci_settings.py @@ -160,7 +160,8 @@ class CiSettings: else: return False - if self.exclude_keywords: + # do not exclude builds + if self.exclude_keywords and not CI.is_build_job(job): for keyword in self.exclude_keywords: if keyword in normalize_string(job): print(f"Job [{job}] matches Exclude keyword [{keyword}] - deny") @@ -168,7 +169,8 @@ class CiSettings: to_deny = False if self.include_keywords: - if job == CI.JobNames.STYLE_CHECK: + # do not exclude builds + if job == CI.JobNames.STYLE_CHECK or CI.is_build_job(job): # never exclude Style Check by include keywords return True for keyword in self.include_keywords: diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 44bd37fe260..3b12fe6974f 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,9 +1,55 @@ +import json import os import re import subprocess +import time from contextlib import contextmanager from pathlib import Path -from typing import Any, Iterator, List, Union, Optional +from typing import Any, Iterator, List, Union, Optional, Sequence, Tuple + +import requests + + +class Envs: + GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") + WORKFLOW_RESULT_FILE = os.getenv( + "WORKFLOW_RESULT_FILE", "/tmp/workflow_results.json" + ) + + +LABEL_CATEGORIES = { + "pr-backward-incompatible": ["Backward Incompatible Change"], + "pr-bugfix": [ + "Bug Fix", + "Bug Fix (user-visible misbehavior in an official stable release)", + "Bug Fix (user-visible misbehaviour in official stable or prestable release)", + "Bug Fix (user-visible misbehavior in official stable or prestable release)", + ], + "pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"], + "pr-build": [ + "Build/Testing/Packaging Improvement", + "Build Improvement", + "Build/Testing Improvement", + "Build", + "Packaging Improvement", + ], + "pr-documentation": [ + "Documentation (changelog entry is not required)", + "Documentation", + ], + "pr-feature": ["New Feature"], + "pr-improvement": ["Improvement"], + "pr-not-for-changelog": [ + "Not for changelog (changelog entry is not required)", + "Not for changelog", + ], + "pr-performance": ["Performance Improvement"], + "pr-ci": ["CI Fix or Improvement (changelog entry is not required)"], +} + +CATEGORY_TO_LABEL = { + c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories +} class WithIter(type): @@ -37,6 +83,44 @@ def normalize_string(string: str) -> str: class GHActions: + class ActionsNames: + RunConfig = "RunConfig" + + class ActionStatuses: + ERROR = "error" + FAILURE = "failure" + PENDING = "pending" + SUCCESS = "success" + + @classmethod + def _get_workflow_results(cls): + if not Path(Envs.WORKFLOW_RESULT_FILE).exists(): + print( + f"ERROR: Failed to get workflow results from file [{Envs.WORKFLOW_RESULT_FILE}]" + ) + return {} + with open(Envs.WORKFLOW_RESULT_FILE, "r", encoding="utf-8") as json_file: + try: + res = json.load(json_file) + except json.JSONDecodeError as e: + print(f"ERROR: json decoder exception {e}") + return {} + return res + + @classmethod + def print_workflow_results(cls): + res = cls._get_workflow_results() + results = [f"{job}: {data['result']}" for job, data in res.items()] + cls.print_in_group("Workflow results", results) + + @classmethod + def get_workflow_job_result(cls, wf_job_name: str) -> Optional[str]: + res = cls._get_workflow_results() + if wf_job_name in res: + return res[wf_job_name]["result"] # type: ignore + else: + return None + @staticmethod def print_in_group(group_name: str, lines: Union[Any, List[Any]]) -> None: lines = list(lines) @@ -45,21 +129,91 @@ class GHActions: print(line) print("::endgroup::") + @staticmethod + def get_commit_status_by_name( + token: str, commit_sha: str, status_name: Union[str, Sequence] + ) -> str: + assert len(token) == 40 + assert len(commit_sha) == 40 + assert is_hex(commit_sha) + assert not is_hex(token) + url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}" + headers = { + "Authorization": f"token {token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get(url, headers=headers, timeout=5) + + if isinstance(status_name, str): + status_name = (status_name,) + if response.status_code == 200: + assert "next" not in response.links, "Response truncated" + statuses = response.json() + for status in statuses: + if status["context"] in status_name: + return status["state"] # type: ignore + return "" + + @staticmethod + def check_wf_completed(token: str, commit_sha: str) -> bool: + headers = { + "Authorization": f"token {token}", + "Accept": "application/vnd.github.v3+json", + } + url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/check-runs?per_page={100}" + + for i in range(3): + try: + response = requests.get(url, headers=headers, timeout=5) + response.raise_for_status() + # assert "next" not in response.links, "Response truncated" + + data = response.json() + assert data["check_runs"], "?" + + for check in data["check_runs"]: + if check["status"] != "completed": + print( + f" Check workflow status: Check not completed [{check['name']}]" + ) + return False + return True + except Exception as e: + print(f"ERROR: exception after attempt [{i}]: {e}") + time.sleep(1) + + return False + + @staticmethod + def get_pr_url_by_branch(repo, branch): + get_url_cmd = ( + f"gh pr list --repo {repo} --head {branch} --json url --jq '.[0].url'" + ) + url = Shell.run(get_url_cmd) + if not url: + print(f"ERROR: PR nor found, branch [{branch}]") + return url + class Shell: @classmethod def run_strict(cls, command): - subprocess.run( - command + " 2>&1", + res = subprocess.run( + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True, ) + return res.stdout.strip() @classmethod - def run(cls, command): + def run(cls, command, check=False, dry_run=False): + if dry_run: + print(f"Dry-ryn. Would run command [{command}]") + return "" + print(f"Run command [{command}]") res = "" result = subprocess.run( command, @@ -70,13 +224,26 @@ class Shell: check=False, ) if result.returncode == 0: + print(f"stdout: {result.stdout.strip()}") res = result.stdout + else: + print( + f"ERROR: stdout: {result.stdout.strip()}, stderr: {result.stderr.strip()}" + ) + if check: + assert result.returncode == 0 return res.strip() + @classmethod + def run_as_daemon(cls, command): + print(f"Run daemon command [{command}]") + subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with + return 0, "" + @classmethod def check(cls, command): result = subprocess.run( - command + " 2>&1", + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -108,3 +275,81 @@ class Utils: @staticmethod def clear_dmesg(): Shell.run("sudo dmesg --clear ||:") + + @staticmethod + def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]: + """The function checks the body to being properly formatted according to + .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty, + then there is an error.""" + lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else [])) + lines = [re.sub(r"\s+", " ", line) for line in lines] + + # Check if body contains "Reverts ClickHouse/ClickHouse#36337" + if [ + True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line) + ]: + return "", LABEL_CATEGORIES["pr-not-for-changelog"][0] + + category = "" + entry = "" + description_error = "" + + i = 0 + while i < len(lines): + if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]): + i += 1 + if i >= len(lines): + break + # Can have one empty line between header and the category + # itself. Filter it out. + if not lines[i]: + i += 1 + if i >= len(lines): + break + category = re.sub(r"^[-*\s]*", "", lines[i]) + i += 1 + + # Should not have more than one category. Require empty line + # after the first found category. + if i >= len(lines): + break + if lines[i]: + second_category = re.sub(r"^[-*\s]*", "", lines[i]) + description_error = ( + "More than one changelog category specified: " + f"'{category}', '{second_category}'" + ) + return description_error, category + + elif re.match( + r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] + ): + i += 1 + # Can have one empty line between header and the entry itself. + # Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = " ".join(entry_lines) + # Don't accept changelog entries like '...'. + entry = re.sub(r"[#>*_.\- ]", "", entry) + # Don't accept changelog entries like 'Close #12345'. + entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry) + else: + i += 1 + + if not category: + description_error = "Changelog category is empty" + # Filter out the PR categories that are not for changelog. + elif "(changelog entry is not required)" in category: + pass # to not check the rest of the conditions + elif category not in CATEGORY_TO_LABEL: + description_error, category = f"Category '{category}' is not valid", "" + elif not entry: + description_error = f"Changelog entry required for category '{category}'" + + return description_error, category diff --git a/tests/ci/clean_lost_instances_lambda/app.py b/tests/ci/clean_lost_instances_lambda/app.py deleted file mode 100644 index 4accc14f7ae..00000000000 --- a/tests/ci/clean_lost_instances_lambda/app.py +++ /dev/null @@ -1,336 +0,0 @@ -#!/usr/bin/env python3 -""" -Lambda function to: - - calculate number of running runners - - cleaning dead runners from GitHub - - terminating stale lost runners in EC2 -""" - -import argparse -import sys -from dataclasses import dataclass -from datetime import datetime -from typing import Dict, List - -import boto3 # type: ignore -import requests -from botocore.exceptions import ClientError # type: ignore -from lambda_shared import ( - RUNNER_TYPE_LABELS, - RunnerDescription, - RunnerDescriptions, - list_runners, -) -from lambda_shared.token import ( - get_access_token_by_key_app, - get_cached_access_token, - get_key_and_app_from_aws, -) - -UNIVERSAL_LABEL = "universal" - - -@dataclass -class LostInstance: - counter: int - seen: datetime - - def set_offline(self) -> None: - now = datetime.now() - if now.timestamp() <= self.seen.timestamp() + 120: - # the instance is offline for more than 2 minutes, so we increase - # the counter - self.counter += 1 - else: - self.counter = 1 - self.seen = now - - @property - def recently_offline(self) -> bool: - """Returns True if the instance has been seen less than 5 minutes ago""" - return datetime.now().timestamp() <= self.seen.timestamp() + 300 - - @property - def stable_offline(self) -> bool: - return self.counter >= 3 - - -LOST_INSTANCES = {} # type: Dict["str", LostInstance] - - -def get_dead_runners_in_ec2(runners: RunnerDescriptions) -> RunnerDescriptions: - """Returns instances that are offline/dead in EC2, or not found in EC2""" - ids = { - runner.name: runner - for runner in runners - # Only `i-deadbead123` are valid names for an instance ID - if runner.name.startswith("i-") and runner.offline and not runner.busy - } - if not ids: - return [] - - # Delete all offline runners with wrong name - result_to_delete = [ - runner - for runner in runners - if not ids.get(runner.name) and runner.offline and not runner.busy - ] - - client = boto3.client("ec2") - - i = 0 - inc = 100 - - print("Checking ids: ", " ".join(ids.keys())) - instances_statuses = [] - while i < len(ids.keys()): - try: - instances_statuses.append( - client.describe_instance_status( - InstanceIds=list(ids.keys())[i : i + inc] - ) - ) - # It applied only if all ids exist in EC2 - i += inc - except ClientError as e: - # The list of non-existent instances is in the message: - # The instance IDs 'i-069b1c256c06cf4e3, i-0f26430432b044035, - # i-0faa2ff44edbc147e, i-0eccf2514585045ec, i-0ee4ee53e0daa7d4a, - # i-07928f15acd473bad, i-0eaddda81298f9a85' do not exist - message = e.response["Error"]["Message"] - if message.startswith("The instance IDs '") and message.endswith( - "' do not exist" - ): - non_existent = message[18:-14].split(", ") - for n in non_existent: - result_to_delete.append(ids.pop(n)) - else: - raise - - found_instances = set([]) - print("Response", instances_statuses) - for instances_status in instances_statuses: - for instance_status in instances_status["InstanceStatuses"]: - if instance_status["InstanceState"]["Name"] in ("pending", "running"): - found_instances.add(instance_status["InstanceId"]) - - print("Found instances", found_instances) - for runner in result_to_delete: - print("Instance", runner.name, "is not alive, going to remove it") - for instance_id, runner in ids.items(): - if instance_id not in found_instances: - print("Instance", instance_id, "is not found in EC2, going to remove it") - result_to_delete.append(runner) - return result_to_delete - - -def handler(event, context): - _ = event - _ = context - main(get_cached_access_token(), True) - - -def delete_runner(access_token: str, runner: RunnerDescription) -> bool: - headers = { - "Authorization": f"token {access_token}", - "Accept": "application/vnd.github.v3+json", - } - - response = requests.delete( - f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", - headers=headers, - timeout=30, - ) - response.raise_for_status() - print(f"Response code deleting {runner.name} is {response.status_code}") - return bool(response.status_code == 204) - - -def get_lost_ec2_instances(runners: RunnerDescriptions) -> List[str]: - global LOST_INSTANCES - now = datetime.now() - client = boto3.client("ec2") - reservations = client.describe_instances( - Filters=[ - {"Name": "tag-key", "Values": ["github:runner-type"]}, - {"Name": "instance-state-name", "Values": ["pending", "running"]}, - ], - )["Reservations"] - # flatten the reservation into instances - instances = [ - instance - for reservation in reservations - for instance in reservation["Instances"] - ] - offline_runner_names = { - runner.name for runner in runners if runner.offline and not runner.busy - } - runner_names = {runner.name for runner in runners} - - def offline_instance(iid: str) -> None: - if iid in LOST_INSTANCES: - LOST_INSTANCES[iid].set_offline() - return - LOST_INSTANCES[iid] = LostInstance(1, now) - - for instance in instances: - # Do not consider instances started 20 minutes ago as problematic - if now.timestamp() - instance["LaunchTime"].timestamp() < 1200: - continue - - runner_type = [ - tag["Value"] - for tag in instance["Tags"] - if tag["Key"] == "github:runner-type" - ][0] - # If there's no necessary labels in runner type it's fine - if not (UNIVERSAL_LABEL in runner_type or runner_type in RUNNER_TYPE_LABELS): - continue - - if instance["InstanceId"] in offline_runner_names: - offline_instance(instance["InstanceId"]) - continue - - if ( - instance["State"]["Name"] == "running" - and not instance["InstanceId"] in runner_names - ): - offline_instance(instance["InstanceId"]) - - instance_ids = [instance["InstanceId"] for instance in instances] - # clean out long unseen instances - LOST_INSTANCES = { - instance_id: stats - for instance_id, stats in LOST_INSTANCES.items() - if stats.recently_offline and instance_id in instance_ids - } - print("The remained LOST_INSTANCES: ", LOST_INSTANCES) - - return [ - instance_id - for instance_id, stats in LOST_INSTANCES.items() - if stats.stable_offline - ] - - -def continue_lifecycle_hooks(delete_offline_runners: bool) -> None: - """The function to trigger CONTINUE for instances' lifectycle hooks""" - client = boto3.client("ec2") - reservations = client.describe_instances( - Filters=[ - {"Name": "tag-key", "Values": ["github:runner-type"]}, - {"Name": "instance-state-name", "Values": ["shutting-down", "terminated"]}, - ], - )["Reservations"] - # flatten the reservation into instances - terminated_instances = [ - instance["InstanceId"] - for reservation in reservations - for instance in reservation["Instances"] - ] - - asg_client = boto3.client("autoscaling") - as_groups = asg_client.describe_auto_scaling_groups( - Filters=[{"Name": "tag-key", "Values": ["github:runner-type"]}] - )["AutoScalingGroups"] - for asg in as_groups: - lifecycle_hooks = [ - lch - for lch in asg_client.describe_lifecycle_hooks( - AutoScalingGroupName=asg["AutoScalingGroupName"] - )["LifecycleHooks"] - if lch["LifecycleTransition"] == "autoscaling:EC2_INSTANCE_TERMINATING" - ] - if not lifecycle_hooks: - continue - for instance in asg["Instances"]: - continue_instance = False - if instance["LifecycleState"] == "Terminating:Wait": - if instance["HealthStatus"] == "Unhealthy": - print(f"The instance {instance['InstanceId']} is Unhealthy") - continue_instance = True - elif ( - instance["HealthStatus"] == "Healthy" - and instance["InstanceId"] in terminated_instances - ): - print( - f"The instance {instance['InstanceId']} is already terminated" - ) - continue_instance = True - if continue_instance: - if delete_offline_runners: - for lch in lifecycle_hooks: - print(f"Continue lifecycle hook {lch['LifecycleHookName']}") - asg_client.complete_lifecycle_action( - LifecycleHookName=lch["LifecycleHookName"], - AutoScalingGroupName=asg["AutoScalingGroupName"], - LifecycleActionResult="CONTINUE", - InstanceId=instance["InstanceId"], - ) - - -def main( - access_token: str, - delete_offline_runners: bool, -) -> None: - gh_runners = list_runners(access_token) - - dead_runners = get_dead_runners_in_ec2(gh_runners) - print("Runners in GH API to terminate: ", [runner.name for runner in dead_runners]) - if delete_offline_runners and dead_runners: - print("Going to delete offline runners") - for runner in dead_runners: - print("Deleting runner", runner) - delete_runner(access_token, runner) - elif dead_runners: - print("Would delete dead runners: ", dead_runners) - - lost_instances = get_lost_ec2_instances(gh_runners) - print("Instances to terminate: ", lost_instances) - if delete_offline_runners: - if lost_instances: - print("Going to terminate lost instances") - boto3.client("ec2").terminate_instances(InstanceIds=lost_instances) - - continue_lifecycle_hooks(delete_offline_runners) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Get list of runners and their states") - parser.add_argument( - "-p", "--private-key-path", help="Path to file with private key" - ) - parser.add_argument("-k", "--private-key", help="Private key") - parser.add_argument( - "-a", "--app-id", type=int, help="GitHub application ID", required=True - ) - parser.add_argument( - "--delete-offline", action="store_true", help="Remove offline runners" - ) - - args = parser.parse_args() - - if not args.private_key_path and not args.private_key: - print( - "Either --private-key-path or --private-key must be specified", - file=sys.stderr, - ) - - if args.private_key_path and args.private_key: - print( - "Either --private-key-path or --private-key must be specified", - file=sys.stderr, - ) - - if args.private_key: - private_key = args.private_key - elif args.private_key_path: - with open(args.private_key_path, "r", encoding="utf-8") as key_file: - private_key = key_file.read() - else: - print("Attempt to get key and id from AWS secret manager") - private_key, args.app_id = get_key_and_app_from_aws() - - token = get_access_token_by_key_app(private_key, args.app_id) - - main(token, args.delete_offline) diff --git a/tests/ci/clean_lost_instances_lambda/build_and_deploy_archive.sh b/tests/ci/clean_lost_instances_lambda/build_and_deploy_archive.sh deleted file mode 120000 index 96ba3fa024e..00000000000 --- a/tests/ci/clean_lost_instances_lambda/build_and_deploy_archive.sh +++ /dev/null @@ -1 +0,0 @@ -../team_keys_lambda/build_and_deploy_archive.sh \ No newline at end of file diff --git a/tests/ci/clean_lost_instances_lambda/lambda_shared b/tests/ci/clean_lost_instances_lambda/lambda_shared deleted file mode 120000 index ba86e090f6c..00000000000 --- a/tests/ci/clean_lost_instances_lambda/lambda_shared +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package/lambda_shared \ No newline at end of file diff --git a/tests/ci/clean_lost_instances_lambda/requirements.txt b/tests/ci/clean_lost_instances_lambda/requirements.txt deleted file mode 100644 index e2b16067a93..00000000000 --- a/tests/ci/clean_lost_instances_lambda/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -../lambda_shared_package -../lambda_shared_package[token] diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 91ea5c6d5d3..287970cce9a 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -197,6 +197,10 @@ def get_instance_id(): return _query_imds("latest/meta-data/instance-id") +def get_instance_lifecycle(): + return _query_imds("latest/meta-data/instance-life-cycle") + + def prepare_tests_results_for_clickhouse( pr_info: PRInfo, test_results: TestResults, @@ -233,7 +237,7 @@ def prepare_tests_results_for_clickhouse( "head_ref": head_ref, "head_repo": head_repo, "task_url": pr_info.task_url, - "instance_type": get_instance_type(), + "instance_type": ",".join([get_instance_type(), get_instance_lifecycle()]), "instance_id": get_instance_id(), } diff --git a/tests/ci/compress_files.py b/tests/ci/compress_files.py index 70c0aca6a36..f49c872b5a0 100644 --- a/tests/ci/compress_files.py +++ b/tests/ci/compress_files.py @@ -58,14 +58,14 @@ def decompress_fast(archive_path: Path, result_path: Optional[Path] = None) -> N archive_path, result_path, ) - program_part = "--use-compress-program='zstd --threads=0'" + program_part = "--use-compress-program='zstd --threads=0 -d'" elif PIGZ.exists(): logging.info( "pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, result_path, ) - program_part = "--use-compress-program='pigz'" + program_part = "--use-compress-program='pigz -d'" else: program_part = "-z" logging.info( diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 7f4cf8c787a..a0b4083b673 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -2,7 +2,6 @@ import argparse import dataclasses import json import os -import subprocess from contextlib import contextmanager from copy import copy @@ -13,7 +12,8 @@ from git_helper import Git, GIT_PREFIX from ssh import SSHAgent from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET from s3_helper import S3Helper -from autoscale_runners_lambda.lambda_shared.pr import Labels +from ci_utils import Shell, GHActions +from ci_buddy import CIBuddy from version_helper import ( FILE_WITH_VERSION_PATH, GENERATED_CONTRIBUTORS, @@ -27,34 +27,66 @@ from ci_config import CI CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH) CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS) +RELEASE_INFO_FILE = "/tmp/release_info.json" -class ShellRunner: +class ReleaseProgress: + STARTED = "started" + DOWNLOAD_PACKAGES = "download packages" + PUSH_RELEASE_TAG = "push release tag" + PUSH_NEW_RELEASE_BRANCH = "push new release branch" + BUMP_VERSION = "bump version" + CREATE_GH_RELEASE = "create GH release" + EXPORT_TGZ = "export TGZ packages" + EXPORT_RPM = "export RPM packages" + EXPORT_DEB = "export DEB packages" + TEST_TGZ = "test TGZ packages" + TEST_RPM = "test RPM packages" + TEST_DEB = "test DEB packages" + COMPLETED = "completed" - @classmethod - def run( - cls, command, check_retcode=True, print_output=True, async_=False, dry_run=False - ): - if dry_run: - print(f"Dry-run: Would run shell command: [{command}]") - return 0, "" - print(f"Running shell command: [{command}]") - if async_: - subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with - return 0, "" - result = subprocess.run( - command + " 2>&1", - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - check=True, - ) - if print_output: - print(result.stdout) - if check_retcode: - assert result.returncode == 0, f"Return code [{result.returncode}]" - return result.returncode, result.stdout + +class ReleaseProgressDescription: + OK = "OK" + FAILED = "FAILED" + + +class ReleaseContextManager: + def __init__(self, release_progress): + self.release_progress = release_progress + self.release_info = None + + def __enter__(self): + if self.release_progress == ReleaseProgress.STARTED: + # create initial release info + self.release_info = ReleaseInfo( + release_branch="NA", + commit_sha=args.ref, + release_tag="NA", + version="NA", + codename="NA", + previous_release_tag="NA", + previous_release_sha="NA", + release_progress=ReleaseProgress.STARTED, + ).dump() + else: + # fetch release info from fs and update + self.release_info = ReleaseInfo.from_file() + assert self.release_info + assert ( + self.release_info.progress_description == ReleaseProgressDescription.OK + ), "Must be OK on the start of new context" + self.release_info.release_progress = self.release_progress + self.release_info.dump() + return self.release_info + + def __exit__(self, exc_type, exc_value, traceback): + assert self.release_info + if exc_type is not None: + self.release_info.progress_description = ReleaseProgressDescription.FAILED + else: + self.release_info.progress_description = ReleaseProgressDescription.OK + self.release_info.dump() @dataclasses.dataclass @@ -65,29 +97,52 @@ class ReleaseInfo: commit_sha: str # lts or stable codename: str + previous_release_tag: str + previous_release_sha: str + changelog_pr: str = "" + version_bump_pr: str = "" + release_url: str = "" + debian_command: str = "" + rpm_command: str = "" + tgz_command: str = "" + docker_command: str = "" + release_progress: str = "" + progress_description: str = "" + + def is_patch(self): + return self.release_branch != "master" + + def is_new_release_branch(self): + return self.release_branch == "master" @staticmethod - def from_file(file_path: str) -> "ReleaseInfo": - with open(file_path, "r", encoding="utf-8") as json_file: + def from_file() -> "ReleaseInfo": + with open(RELEASE_INFO_FILE, "r", encoding="utf-8") as json_file: res = json.load(json_file) return ReleaseInfo(**res) - @staticmethod - def prepare(commit_ref: str, release_type: str, outfile: str) -> None: - Path(outfile).parent.mkdir(parents=True, exist_ok=True) - Path(outfile).unlink(missing_ok=True) + def dump(self): + print(f"Dump release info into [{RELEASE_INFO_FILE}]") + with open(RELEASE_INFO_FILE, "w", encoding="utf-8") as f: + print(json.dumps(dataclasses.asdict(self), indent=2), file=f) + return self + + def prepare(self, commit_ref: str, release_type: str) -> "ReleaseInfo": version = None release_branch = None release_tag = None - codename = None + previous_release_tag = None + previous_release_sha = None + codename = "" assert release_type in ("patch", "new") if release_type == "new": # check commit_ref is right and on a right branch - ShellRunner.run( - f"git merge-base --is-ancestor origin/{commit_ref} origin/master" + Shell.run( + f"git merge-base --is-ancestor {commit_ref} origin/master", + check=True, ) with checkout(commit_ref): - _, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") + commit_sha = Shell.run(f"git rev-parse {commit_ref}", check=True) # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) @@ -98,12 +153,14 @@ class ReleaseInfo: git.latest_tag == expected_prev_tag ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]" release_tag = version.describe - codename = ( - VersionType.STABLE - ) # dummy value (artifactory won't be updated for new release) + previous_release_tag = expected_prev_tag + previous_release_sha = Shell.run_strict( + f"git rev-parse {previous_release_tag}" + ) + assert previous_release_sha if release_type == "patch": with checkout(commit_ref): - _, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") + commit_sha = Shell.run(f"git rev-parse {commit_ref}", check=True) # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) @@ -111,16 +168,18 @@ class ReleaseInfo: version.with_description(codename) release_branch = f"{version.major}.{version.minor}" release_tag = version.describe - ShellRunner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") + Shell.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags", check=True) # check commit is right and on a right branch - ShellRunner.run( - f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}" + Shell.run( + f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}", + check=True, ) if version.patch == 1: expected_version = copy(version) + previous_release_tag = f"v{version.major}.{version.minor}.1.1-new" expected_version.bump() expected_tag_prefix = ( - f"v{expected_version.major}.{expected_version.minor}-" + f"v{expected_version.major}.{expected_version.minor}." ) expected_tag_suffix = "-new" else: @@ -128,6 +187,7 @@ class ReleaseInfo: f"v{version.major}.{version.minor}.{version.patch-1}." ) expected_tag_suffix = f"-{version.get_stable_release_type()}" + previous_release_tag = git.latest_tag if git.latest_tag.startswith( expected_tag_prefix ) and git.latest_tag.endswith(expected_tag_suffix): @@ -137,27 +197,36 @@ class ReleaseInfo: False ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]" + previous_release_sha = Shell.run_strict( + f"git rev-parse {previous_release_tag}" + ) + assert previous_release_sha + assert ( release_branch + and previous_release_tag + and previous_release_sha and commit_sha and release_tag and version - and codename in ("lts", "stable") + and (codename in ("lts", "stable") or release_type == "new") ) - res = ReleaseInfo( - release_branch=release_branch, - commit_sha=commit_sha, - release_tag=release_tag, - version=version.string, - codename=codename, - ) - with open(outfile, "w", encoding="utf-8") as f: - print(json.dumps(dataclasses.asdict(res), indent=2), file=f) + + self.release_branch = release_branch + self.commit_sha = commit_sha + self.release_tag = release_tag + self.version = version.string + self.codename = codename + self.previous_release_tag = previous_release_tag + self.previous_release_sha = previous_release_sha + self.release_progress = ReleaseProgress.STARTED + self.progress_description = ReleaseProgressDescription.OK + return self def push_release_tag(self, dry_run: bool) -> None: if dry_run: # remove locally created tag from prev run - ShellRunner.run( + Shell.run( f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag} ||:" ) # Create release tag @@ -165,16 +234,17 @@ class ReleaseInfo: f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]" ) tag_message = f"Release {self.release_tag}" - ShellRunner.run( - f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}" + Shell.run( + f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}", + check=True, ) cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}" - ShellRunner.run(cmd_push_tag, dry_run=dry_run) + Shell.run(cmd_push_tag, dry_run=dry_run, check=True) @staticmethod def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None: cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" - ShellRunner.run(cmd, dry_run=dry_run) + Shell.run(cmd, dry_run=dry_run, check=True) def push_new_release_branch(self, dry_run: bool) -> None: assert ( @@ -191,21 +261,21 @@ class ReleaseInfo: ), f"Unexpected current version in git, must precede [{self.version}] by one step, actual [{version.string}]" if dry_run: # remove locally created branch from prev run - ShellRunner.run( - f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch} ||:" + Shell.run( + f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch}" ) print( f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]" ) with checkout(self.release_branch): with checkout_new(new_release_branch): - pr_labels = f"--label {Labels.RELEASE}" + pr_labels = f"--label {CI.Labels.RELEASE}" if stable_release_type == VersionType.LTS: - pr_labels += f" --label {Labels.RELEASE_LTS}" + pr_labels += f" --label {CI.Labels.RELEASE_LTS}" cmd_push_branch = ( f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}" ) - ShellRunner.run(cmd_push_branch, dry_run=dry_run) + Shell.run(cmd_push_branch, dry_run=dry_run, check=True) print("Create and push backport tags for new release branch") ReleaseInfo._create_gh_label( @@ -214,12 +284,13 @@ class ReleaseInfo: ReleaseInfo._create_gh_label( f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run ) - ShellRunner.run( + Shell.run( f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}' --head {new_release_branch} {pr_labels} --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.' """, dry_run=dry_run, + check=True, ) def update_version_and_contributors_list(self, dry_run: bool) -> None: @@ -245,32 +316,55 @@ class ReleaseInfo: body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") actor = os.getenv("GITHUB_ACTOR", "") or "me" cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}" - ShellRunner.run(cmd_commit_version_upd, dry_run=dry_run) - ShellRunner.run(cmd_push_branch, dry_run=dry_run) - ShellRunner.run(cmd_create_pr, dry_run=dry_run) + Shell.run(cmd_commit_version_upd, check=True, dry_run=dry_run) + Shell.run(cmd_push_branch, check=True, dry_run=dry_run) + Shell.run(cmd_create_pr, check=True, dry_run=dry_run) if dry_run: - ShellRunner.run( - f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" - ) - ShellRunner.run( + Shell.run(f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'") + Shell.run( f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" ) + self.version_bump_pr = "dry-run" + else: + self.version_bump_pr = GHActions.get_pr_url_by_branch( + repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors + ) + + def update_release_info(self, dry_run: bool) -> "ReleaseInfo": + if self.release_branch != "master": + branch = f"auto/{release_info.release_tag}" + if not dry_run: + url = GHActions.get_pr_url_by_branch( + repo=GITHUB_REPOSITORY, branch=branch + ) + else: + url = "dry-run" + print(f"ChangeLog PR url [{url}]") + self.changelog_pr = url + print(f"Release url [{url}]") + self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" + if self.release_progress == ReleaseProgress.COMPLETED: + self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version" + self.dump() + return self def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None: repo = os.getenv("GITHUB_REPOSITORY") assert repo - cmds = [] - cmds.append( + cmds = [ f"gh release create --repo {repo} --title 'Release {self.release_tag}' {self.release_tag}" - ) + ] for file in packages_files: cmds.append(f"gh release upload {self.release_tag} {file}") if not dry_run: for cmd in cmds: - ShellRunner.run(cmd) + Shell.run(cmd, check=True) + self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" else: print("Dry-run, would run commands:") print("\n * ".join(cmds)) + self.release_url = f"dry-run" + self.dump() class RepoTypes: @@ -330,7 +424,7 @@ class PackageDownloader: self.macos_package_files = ["clickhouse-macos", "clickhouse-macos-aarch64"] self.file_to_type = {} - ShellRunner.run(f"mkdir -p {self.LOCAL_DIR}") + Shell.run(f"mkdir -p {self.LOCAL_DIR}") for package_type in self.PACKAGE_TYPES: for package in self.package_names: @@ -380,7 +474,7 @@ class PackageDownloader: return res def run(self): - ShellRunner.run(f"rm -rf {self.LOCAL_DIR}/*") + Shell.run(f"rm -rf {self.LOCAL_DIR}/*") for package_file in ( self.deb_package_files + self.rpm_package_files + self.tgz_package_files ): @@ -453,6 +547,37 @@ class PackageDownloader: return True +@contextmanager +def checkout(ref: str) -> Iterator[None]: + orig_ref = Shell.run(f"{GIT_PREFIX} symbolic-ref --short HEAD", check=True) + rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" + assert orig_ref + if ref not in (orig_ref,): + Shell.run(f"{GIT_PREFIX} checkout {ref}") + try: + yield + except (Exception, KeyboardInterrupt) as e: + print(f"ERROR: Exception [{e}]") + Shell.run(rollback_cmd) + raise + Shell.run(rollback_cmd) + + +@contextmanager +def checkout_new(ref: str) -> Iterator[None]: + orig_ref = Shell.run(f"{GIT_PREFIX} symbolic-ref --short HEAD", check=True) + rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" + assert orig_ref + Shell.run(f"{GIT_PREFIX} checkout -b {ref}", check=True) + try: + yield + except (Exception, KeyboardInterrupt) as e: + print(f"ERROR: Exception [{e}]") + Shell.run(rollback_cmd) + raise + Shell.run(rollback_cmd) + + def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -488,6 +613,11 @@ def parse_args() -> argparse.Namespace: action="store_true", help="Create GH Release object and attach all packages", ) + parser.add_argument( + "--post-status", + action="store_true", + help="Post release status into Slack", + ) parser.add_argument( "--ref", type=str, @@ -506,55 +636,25 @@ def parse_args() -> argparse.Namespace: help="do not make any actual changes in the repo, just show what will be done", ) parser.add_argument( - "--outfile", - default="", - type=str, - help="output file to write json result to, if not set - stdout", + "--set-progress-started", + action="store_true", + help="Set new progress step, --progress must be set", ) parser.add_argument( - "--infile", - default="", + "--progress", type=str, - help="input file with release info", + help="Progress step name, see @ReleaseProgress", + ) + parser.add_argument( + "--set-progress-completed", + action="store_true", + help="Set current progress step to OK (completed)", ) - return parser.parse_args() -@contextmanager -def checkout(ref: str) -> Iterator[None]: - _, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") - rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" - assert orig_ref - if ref not in (orig_ref,): - ShellRunner.run(f"{GIT_PREFIX} checkout {ref}") - try: - yield - except (Exception, KeyboardInterrupt) as e: - print(f"ERROR: Exception [{e}]") - ShellRunner.run(rollback_cmd) - raise - ShellRunner.run(rollback_cmd) - - -@contextmanager -def checkout_new(ref: str) -> Iterator[None]: - _, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") - rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" - assert orig_ref - ShellRunner.run(f"{GIT_PREFIX} checkout -b {ref}") - try: - yield - except (Exception, KeyboardInterrupt) as e: - print(f"ERROR: Exception [{e}]") - ShellRunner.run(rollback_cmd) - raise - ShellRunner.run(rollback_cmd) - - if __name__ == "__main__": args = parse_args() - assert args.dry_run # prepare ssh for git if needed _ssh_agent = None @@ -566,43 +666,91 @@ if __name__ == "__main__": _ssh_agent.print_keys() if args.prepare_release_info: - assert ( - args.ref and args.release_type and args.outfile - ), "--ref, --release-type and --outfile must be provided with --prepare-release-info" - ReleaseInfo.prepare( - commit_ref=args.ref, release_type=args.release_type, outfile=args.outfile - ) - if args.push_release_tag: - assert args.infile, "--infile must be provided" - release_info = ReleaseInfo.from_file(args.infile) - release_info.push_release_tag(dry_run=args.dry_run) - if args.push_new_release_branch: - assert args.infile, "--infile must be provided" - release_info = ReleaseInfo.from_file(args.infile) - release_info.push_new_release_branch(dry_run=args.dry_run) - if args.create_bump_version_pr: - # TODO: store link to PR in release info - assert args.infile, "--infile must be provided" - release_info = ReleaseInfo.from_file(args.infile) - release_info.update_version_and_contributors_list(dry_run=args.dry_run) + with ReleaseContextManager( + release_progress=ReleaseProgress.STARTED + ) as release_info: + assert ( + args.ref and args.release_type + ), "--ref and --release-type must be provided with --prepare-release-info" + release_info.prepare(commit_ref=args.ref, release_type=args.release_type) + if args.download_packages: - assert args.infile, "--infile must be provided" - release_info = ReleaseInfo.from_file(args.infile) - p = PackageDownloader( - release=release_info.release_branch, - commit_sha=release_info.commit_sha, - version=release_info.version, - ) - p.run() + with ReleaseContextManager( + release_progress=ReleaseProgress.DOWNLOAD_PACKAGES + ) as release_info: + p = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + p.run() + + if args.push_release_tag: + with ReleaseContextManager( + release_progress=ReleaseProgress.PUSH_RELEASE_TAG + ) as release_info: + release_info.push_release_tag(dry_run=args.dry_run) + + if args.push_new_release_branch: + with ReleaseContextManager( + release_progress=ReleaseProgress.PUSH_NEW_RELEASE_BRANCH + ) as release_info: + release_info.push_new_release_branch(dry_run=args.dry_run) + + if args.create_bump_version_pr: + with ReleaseContextManager( + release_progress=ReleaseProgress.BUMP_VERSION + ) as release_info: + release_info.update_version_and_contributors_list(dry_run=args.dry_run) + if args.create_gh_release: - assert args.infile, "--infile must be provided" - release_info = ReleaseInfo.from_file(args.infile) - p = PackageDownloader( - release=release_info.release_branch, - commit_sha=release_info.commit_sha, - version=release_info.version, - ) - release_info.create_gh_release(p.get_all_packages_files(), args.dry_run) + with ReleaseContextManager( + release_progress=ReleaseProgress.CREATE_GH_RELEASE + ) as release_info: + p = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + release_info.create_gh_release( + packages_files=p.get_all_packages_files(), dry_run=args.dry_run + ) + + if args.post_status: + release_info = ReleaseInfo.from_file() + release_info.update_release_info(dry_run=args.dry_run) + if release_info.is_new_release_branch(): + title = "New release branch" + else: + title = "New release" + if ( + release_info.progress_description == ReleaseProgressDescription.OK + and release_info.release_progress == ReleaseProgress.COMPLETED + ): + title = "Completed: " + title + CIBuddy(dry_run=args.dry_run).post_done( + title, dataclasses.asdict(release_info) + ) + else: + title = "Failed: " + title + CIBuddy(dry_run=args.dry_run).post_critical( + title, dataclasses.asdict(release_info) + ) + + if args.set_progress_started: + ri = ReleaseInfo.from_file() + ri.release_progress = args.progress + ri.progress_description = ReleaseProgressDescription.FAILED + ri.dump() + assert args.progress, "Progress step name must be provided" + + if args.set_progress_completed: + ri = ReleaseInfo.from_file() + assert ( + ri.progress_description == ReleaseProgressDescription.FAILED + ), "Must be FAILED before set to OK" + ri.progress_description = ReleaseProgressDescription.OK + ri.dump() # tear down ssh if _ssh_agent and _key_pub: @@ -618,6 +766,8 @@ sudo apt install --yes --no-install-recommends python3-dev python3-pip gh unzip sudo apt install --yes python3-boto3 sudo apt install --yes python3-github sudo apt install --yes python3-unidiff +sudo apt install --yes python3-tqdm # cloud changelog +sudo apt install --yes python3-thefuzz # cloud changelog sudo apt install --yes s3fs ### INSTALL AWS CLI diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index ef9f4dc016e..66db082677f 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -17,13 +17,35 @@ from download_release_packages import download_last_release from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH from get_robot_token import get_parameter_from_ssm from pr_info import PRInfo -from report import ERROR, SUCCESS, JobReport, StatusType, TestResults, read_test_results +from report import ( + ERROR, + SUCCESS, + JobReport, + StatusType, + TestResults, + read_test_results, + FAILURE, +) from stopwatch import Stopwatch from tee_popen import TeePopen +from ci_config import CI +from ci_utils import Utils NO_CHANGES_MSG = "Nothing to run" +class SensitiveFormatter(logging.Formatter): + @staticmethod + def _filter(s): + return re.sub( + r"(.*)(AZURE_CONNECTION_STRING.*\')(.*)", r"\1AZURE_CONNECTION_STRING\3", s + ) + + def format(self, record): + original = logging.Formatter.format(self, record) + return self._filter(original) + + def get_additional_envs( check_name: str, run_by_hash_num: int, run_by_hash_total: int ) -> List[str]: @@ -90,8 +112,8 @@ def get_run_command( ] if flaky_check: - envs.append("-e NUM_TRIES=100") - envs.append("-e MAX_RUN_TIME=1800") + envs.append("-e NUM_TRIES=50") + envs.append("-e MAX_RUN_TIME=2800") envs += [f"-e {e}" for e in additional_envs] @@ -213,6 +235,9 @@ def parse_args(): def main(): logging.basicConfig(level=logging.INFO) + for handler in logging.root.handlers: + # pylint: disable=protected-access + handler.setFormatter(SensitiveFormatter(handler.formatter._fmt)) # type: ignore stopwatch = Stopwatch() @@ -336,7 +361,23 @@ def main(): additional_files=additional_logs, ).dump(to_file=args.report_to_file if args.report_to_file else None) + should_block_ci = False if state != SUCCESS: + should_block_ci = True + + if state == FAILURE and CI.is_required(check_name): + failed_cnt = Utils.get_failed_tests_number(description) + print( + f"Job status is [{state}] with [{failed_cnt}] failed test cases. status description [{description}]" + ) + if ( + failed_cnt + and failed_cnt <= CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI + ): + print(f"Won't block the CI workflow") + should_block_ci = False + + if should_block_ci: sys.exit(1) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 80ac1935d95..6245f0490fc 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -23,10 +23,13 @@ from report import ( TestResult, TestResults, read_test_results, + FAILURE, ) from stopwatch import Stopwatch import integration_tests_runner as runner +from ci_config import CI +from ci_utils import Utils def get_json_params_dict( @@ -233,7 +236,23 @@ def main(): additional_files=additional_logs, ).dump(to_file=args.report_to_file if args.report_to_file else None) + should_block_ci = False if state != SUCCESS: + should_block_ci = True + + if state == FAILURE and CI.is_required(check_name): + failed_cnt = Utils.get_failed_tests_number(description) + print( + f"Job status is [{state}] with [{failed_cnt}] failed test cases. status description [{description}]" + ) + if ( + failed_cnt + and failed_cnt <= CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI + ): + print(f"Won't block the CI workflow") + should_block_ci = False + + if should_block_ci: sys.exit(1) diff --git a/tests/ci/lambda_shared_package/.gitignore b/tests/ci/lambda_shared_package/.gitignore deleted file mode 100644 index 59d52651e06..00000000000 --- a/tests/ci/lambda_shared_package/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -build -*.egg-info diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py deleted file mode 100644 index 8b53f9dcb23..00000000000 --- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py +++ /dev/null @@ -1,237 +0,0 @@ -"""The shared code and types for all our CI lambdas -It exists as __init__.py and lambda_shared/__init__.py to work both in local and venv""" - -import json -import logging -import time -from collections import namedtuple -from typing import Any, Dict, Iterable, List, Optional - -import boto3 # type: ignore -import requests - -RUNNER_TYPE_LABELS = [ - "builder", - "func-tester", - "func-tester-aarch64", - "fuzzer-unit-tester", - "limited-tester", - "stress-tester", - "style-checker", - "style-checker-aarch64", - # private runners - "private-builder", - "private-clickpipes", - "private-func-tester", - "private-fuzzer-unit-tester", - "private-stress-tester", - "private-style-checker", -] - - -### VENDORING -def get_parameter_from_ssm( - name: str, decrypt: bool = True, client: Optional[Any] = None -) -> str: - if not client: - client = boto3.client("ssm", region_name="us-east-1") - return client.get_parameter(Name=name, WithDecryption=decrypt)[ # type: ignore - "Parameter" - ]["Value"] - - -class CHException(Exception): - pass - - -class InsertException(CHException): - pass - - -class ClickHouseHelper: - def __init__( - self, - url: str, - user: Optional[str] = None, - password: Optional[str] = None, - ): - self.url = url - self.auth = {} - if user: - self.auth["X-ClickHouse-User"] = user - if password: - self.auth["X-ClickHouse-Key"] = password - - @staticmethod - def _insert_json_str_info_impl( - url: str, auth: Dict[str, str], db: str, table: str, json_str: str - ) -> None: - params = { - "database": db, - "query": f"INSERT INTO {table} FORMAT JSONEachRow", - "date_time_input_format": "best_effort", - "send_logs_level": "warning", - } - - for i in range(5): - try: - response = requests.post( - url, params=params, data=json_str, headers=auth - ) - except Exception as e: - error = f"Received exception while sending data to {url} on {i} attempt: {e}" - logging.warning(error) - continue - - logging.info("Response content '%s'", response.content) - - if response.ok: - break - - error = ( - "Cannot insert data into clickhouse at try " - + str(i) - + ": HTTP code " - + str(response.status_code) - + ": '" - + str(response.text) - + "'" - ) - - if response.status_code >= 500: - # A retriable error - time.sleep(1) - continue - - logging.info( - "Request headers '%s', body '%s'", - response.request.headers, - response.request.body, - ) - - raise InsertException(error) - else: - raise InsertException(error) - - def _insert_json_str_info(self, db: str, table: str, json_str: str) -> None: - self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str) - - def insert_event_into( - self, db: str, table: str, event: object, safe: bool = True - ) -> None: - event_str = json.dumps(event) - try: - self._insert_json_str_info(db, table, event_str) - except InsertException as e: - logging.error( - "Exception happened during inserting data into clickhouse: %s", e - ) - if not safe: - raise - - def insert_events_into( - self, db: str, table: str, events: Iterable[object], safe: bool = True - ) -> None: - jsons = [] - for event in events: - jsons.append(json.dumps(event)) - - try: - self._insert_json_str_info(db, table, ",".join(jsons)) - except InsertException as e: - logging.error( - "Exception happened during inserting data into clickhouse: %s", e - ) - if not safe: - raise - - def _select_and_get_json_each_row(self, db: str, query: str) -> str: - params = { - "database": db, - "query": query, - "default_format": "JSONEachRow", - } - for i in range(5): - response = None - try: - response = requests.get(self.url, params=params, headers=self.auth) - response.raise_for_status() - return response.text # type: ignore - except Exception as ex: - logging.warning("Cannot fetch data with exception %s", str(ex)) - if response: - logging.warning("Reponse text %s", response.text) - time.sleep(0.1 * i) - - raise CHException("Cannot fetch data from clickhouse") - - def select_json_each_row(self, db: str, query: str) -> List[dict]: - text = self._select_and_get_json_each_row(db, query) - result = [] - for line in text.split("\n"): - if line: - result.append(json.loads(line)) - return result - - -### Runners - -RunnerDescription = namedtuple( - "RunnerDescription", ["id", "name", "tags", "offline", "busy"] -) -RunnerDescriptions = List[RunnerDescription] - - -def list_runners(access_token: str) -> RunnerDescriptions: - headers = { - "Authorization": f"token {access_token}", - "Accept": "application/vnd.github.v3+json", - } - per_page = 100 - response = requests.get( - f"https://api.github.com/orgs/ClickHouse/actions/runners?per_page={per_page}", - headers=headers, - ) - response.raise_for_status() - data = response.json() - total_runners = data["total_count"] - print("Expected total runners", total_runners) - runners = data["runners"] - - # round to 0 for 0, 1 for 1..100, but to 2 for 101..200 - total_pages = (total_runners - 1) // per_page + 1 - - print("Total pages", total_pages) - for i in range(2, total_pages + 1): - response = requests.get( - "https://api.github.com/orgs/ClickHouse/actions/runners" - f"?page={i}&per_page={per_page}", - headers=headers, - ) - response.raise_for_status() - data = response.json() - runners += data["runners"] - - print("Total runners", len(runners)) - result = [] - for runner in runners: - tags = [tag["name"] for tag in runner["labels"]] - desc = RunnerDescription( - id=runner["id"], - name=runner["name"], - tags=tags, - offline=runner["status"] == "offline", - busy=runner["busy"], - ) - result.append(desc) - - return result - - -def cached_value_is_valid(updated_at: float, ttl: float) -> bool: - "a common function to identify if cachable value is still valid" - if updated_at == 0: - return False - if time.time() - ttl < updated_at: - return True - return False diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py deleted file mode 100644 index e981e28a454..00000000000 --- a/tests/ci/lambda_shared_package/lambda_shared/pr.py +++ /dev/null @@ -1,189 +0,0 @@ -#!/usr/bin/env python - -import re -from typing import Tuple - -# Individual trusted contirbutors who are not in any trusted organization. -# Can be changed in runtime: we will append users that we learned to be in -# a trusted org, to save GitHub API calls. -TRUSTED_CONTRIBUTORS = { - e.lower() - for e in [ - "achimbab", # Kakao corp - "Algunenano", # Raúl Marín, ClickHouse, Inc - "amosbird", - "azat", # SEMRush - "bharatnc", # Many contributions. - "bobrik", # Seasoned contributor, CloudFlare - "cwurm", # ClickHouse, Inc - "den-crane", # Documentation contributor - "hagen1778", # Roman Khavronenko, seasoned contributor - "hczhcz", - "hexiaoting", # Seasoned contributor - "ildus", # adjust, ex-pgpro - "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto - "kreuzerkrieg", - "nikvas0", - "nvartolomei", # Seasoned contributor, CloudFlare - "spongedu", # Seasoned contributor - "taiyang-li", - "ucasFL", # Amos Bird's friend - "vdimir", # ClickHouse, Inc - "YiuRULE", - "zlobober", # Developer of YT - "ilejn", # Arenadata, responsible for Kerberized Kafka - "thomoco", # ClickHouse, Inc - "BoloniniD", # Seasoned contributor, HSE - "tonickkozlov", # Cloudflare - "tylerhannan", # ClickHouse, Inc - "myrrc", # Mike Kot, DoubleCloud - "thevar1able", # ClickHouse, Inc - "aalexfvk", - "MikhailBurdukov", - "tsolodov", # ClickHouse, Inc - "kitaisreal", - "k-morozov", # Konstantin Morozov, Yandex Cloud - "justindeguzman", # ClickHouse, Inc - "jrdi", # ClickHouse contributor, TinyBird - ] -} - - -class Labels: - PR_BUGFIX = "pr-bugfix" - PR_CRITICAL_BUGFIX = "pr-critical-bugfix" - CAN_BE_TESTED = "can be tested" - DO_NOT_TEST = "do not test" - MUST_BACKPORT = "pr-must-backport" - MUST_BACKPORT_CLOUD = "pr-must-backport-cloud" - JEPSEN_TEST = "jepsen-test" - SKIP_MERGEABLE_CHECK = "skip mergeable check" - PR_BACKPORT = "pr-backport" - PR_BACKPORTS_CREATED = "pr-backports-created" - PR_BACKPORTS_CREATED_CLOUD = "pr-backports-created-cloud" - PR_CHERRYPICK = "pr-cherrypick" - PR_CI = "pr-ci" - PR_FEATURE = "pr-feature" - PR_SYNCED_TO_CLOUD = "pr-synced-to-cloud" - PR_SYNC_UPSTREAM = "pr-sync-upstream" - RELEASE = "release" - RELEASE_LTS = "release-lts" - SUBMODULE_CHANGED = "submodule changed" - - # automatic backport for critical bug fixes - AUTO_BACKPORT = {"pr-critical-bugfix"} - - -# Descriptions are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there -# updated accordingly -# The following lists are append only, try to avoid editing them -# They still could be cleaned out after the decent time though. -LABEL_CATEGORIES = { - "pr-backward-incompatible": ["Backward Incompatible Change"], - "pr-bugfix": [ - "Bug Fix", - "Bug Fix (user-visible misbehavior in an official stable release)", - "Bug Fix (user-visible misbehaviour in official stable or prestable release)", - "Bug Fix (user-visible misbehavior in official stable or prestable release)", - ], - "pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"], - "pr-build": [ - "Build/Testing/Packaging Improvement", - "Build Improvement", - "Build/Testing Improvement", - "Build", - "Packaging Improvement", - ], - "pr-documentation": [ - "Documentation (changelog entry is not required)", - "Documentation", - ], - "pr-feature": ["New Feature"], - "pr-improvement": ["Improvement"], - "pr-not-for-changelog": [ - "Not for changelog (changelog entry is not required)", - "Not for changelog", - ], - "pr-performance": ["Performance Improvement"], - "pr-ci": ["CI Fix or Improvement (changelog entry is not required)"], -} - -CATEGORY_TO_LABEL = { - c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories -} - - -def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]: - """The function checks the body to being properly formatted according to - .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty, - then there is an error.""" - lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else [])) - lines = [re.sub(r"\s+", " ", line) for line in lines] - - # Check if body contains "Reverts ClickHouse/ClickHouse#36337" - if [True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)]: - return "", LABEL_CATEGORIES["pr-not-for-changelog"][0] - - category = "" - entry = "" - description_error = "" - - i = 0 - while i < len(lines): - if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]): - i += 1 - if i >= len(lines): - break - # Can have one empty line between header and the category - # itself. Filter it out. - if not lines[i]: - i += 1 - if i >= len(lines): - break - category = re.sub(r"^[-*\s]*", "", lines[i]) - i += 1 - - # Should not have more than one category. Require empty line - # after the first found category. - if i >= len(lines): - break - if lines[i]: - second_category = re.sub(r"^[-*\s]*", "", lines[i]) - description_error = ( - "More than one changelog category specified: " - f"'{category}', '{second_category}'" - ) - return description_error, category - - elif re.match( - r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] - ): - i += 1 - # Can have one empty line between header and the entry itself. - # Filter it out. - if i < len(lines) and not lines[i]: - i += 1 - # All following lines until empty one are the changelog entry. - entry_lines = [] - while i < len(lines) and lines[i]: - entry_lines.append(lines[i]) - i += 1 - entry = " ".join(entry_lines) - # Don't accept changelog entries like '...'. - entry = re.sub(r"[#>*_.\- ]", "", entry) - # Don't accept changelog entries like 'Close #12345'. - entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry) - else: - i += 1 - - if not category: - description_error = "Changelog category is empty" - # Filter out the PR categories that are not for changelog. - elif "(changelog entry is not required)" in category: - pass # to not check the rest of the conditions - elif category not in CATEGORY_TO_LABEL: - description_error, category = f"Category '{category}' is not valid", "" - elif not entry: - description_error = f"Changelog entry required for category '{category}'" - - return description_error, category diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py deleted file mode 100644 index 3fb8f10c0e2..00000000000 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ /dev/null @@ -1,95 +0,0 @@ -"""Module to get the token for GitHub""" - -from dataclasses import dataclass -import json -import time -from typing import Tuple - -import boto3 # type: ignore -import jwt -import requests - -from . import cached_value_is_valid - - -def get_key_and_app_from_aws() -> Tuple[str, int]: - secret_name = "clickhouse_github_secret_key" - session = boto3.session.Session() - client = session.client( - service_name="secretsmanager", - ) - get_secret_value_response = client.get_secret_value(SecretId=secret_name) - data = json.loads(get_secret_value_response["SecretString"]) - return data["clickhouse-app-key"], int(data["clickhouse-app-id"]) - - -def get_installation_id(jwt_token: str) -> int: - headers = { - "Authorization": f"Bearer {jwt_token}", - "Accept": "application/vnd.github.v3+json", - } - response = requests.get("https://api.github.com/app/installations", headers=headers) - response.raise_for_status() - data = response.json() - for installation in data: - if installation["account"]["login"] == "ClickHouse": - installation_id = installation["id"] - - return installation_id # type: ignore - - -def get_access_token_by_jwt(jwt_token: str, installation_id: int) -> str: - headers = { - "Authorization": f"Bearer {jwt_token}", - "Accept": "application/vnd.github.v3+json", - } - response = requests.post( - f"https://api.github.com/app/installations/{installation_id}/access_tokens", - headers=headers, - ) - response.raise_for_status() - data = response.json() - return data["token"] # type: ignore - - -def get_token_from_aws() -> str: - private_key, app_id = get_key_and_app_from_aws() - return get_access_token_by_key_app(private_key, app_id) - - -def get_access_token_by_key_app(private_key: str, app_id: int) -> str: - payload = { - "iat": int(time.time()) - 60, - "exp": int(time.time()) + (10 * 60), - "iss": app_id, - } - - # FIXME: apparently should be switched to this so that mypy is happy - # jwt_instance = JWT() - # encoded_jwt = jwt_instance.encode(payload, private_key, algorithm="RS256") - encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") # type: ignore - installation_id = get_installation_id(encoded_jwt) - return get_access_token_by_jwt(encoded_jwt, installation_id) - - -@dataclass -class CachedToken: - time: float - value: str - updating: bool = False - - -_cached_token = CachedToken(0, "") - - -def get_cached_access_token() -> str: - if time.time() - 550 < _cached_token.time or _cached_token.updating: - return _cached_token.value - # Indicate that the value is updating now, so the cached value can be - # used. The first setting and close-to-ttl are not counted as update - _cached_token.updating = cached_value_is_valid(_cached_token.time, 590) - private_key, app_id = get_key_and_app_from_aws() - _cached_token.time = time.time() - _cached_token.value = get_access_token_by_key_app(private_key, app_id) - _cached_token.updating = False - return _cached_token.value diff --git a/tests/ci/lambda_shared_package/pyproject.toml b/tests/ci/lambda_shared_package/pyproject.toml deleted file mode 100644 index dff36b89fbb..00000000000 --- a/tests/ci/lambda_shared_package/pyproject.toml +++ /dev/null @@ -1,24 +0,0 @@ -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" - -[project] -name = "lambda_shared" -version = "0.0.1" -dependencies = [ - "requests", - "urllib3 < 2" -] - -[project.optional-dependencies] -token = [ - "PyJWT", - "cryptography", -] -dev = [ - "boto3", - "lambda_shared[token]", -] - -[tool.distutils.bdist_wheel] -universal = true diff --git a/tests/ci/lambda_shared_package/setup.cfg b/tests/ci/lambda_shared_package/setup.cfg deleted file mode 100644 index 744280ae41b..00000000000 --- a/tests/ci/lambda_shared_package/setup.cfg +++ /dev/null @@ -1,8 +0,0 @@ -### This file exists for clear builds in docker ### -# without it the `build` directory wouldn't be # -# updated on the fly and will require manual clean # -[build] -build_base = /tmp/lambda_shared - -[egg_info] -egg_base = /tmp/ diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index 1f5936c3fec..d9e33229932 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -74,7 +74,8 @@ def get_run_command( f"--volume={repo_path}/tests:/usr/share/clickhouse-test " f"--volume={result_path}:/test_output " "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls - f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" + f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image} " + "python3 ./utils/runner.py" ) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 59749abb4fa..13c7537a84b 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -27,7 +27,6 @@ from report import SUCCESS, FAILURE from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY from synchronizer_utils import SYNC_BRANCH_PREFIX from ci_config import CI -from ci_utils import Utils # The team name for accepted approvals TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core") @@ -249,63 +248,25 @@ def main(): repo = gh.get_repo(args.repo) if args.set_ci_status: + # set Mergeable check status and exit assert args.wf_status in (FAILURE, SUCCESS) - # set mergeable check status and exit commit = get_commit(gh, args.pr_info.sha) statuses = get_commit_filtered_statuses(commit) - max_failed_tests_per_job = 0 - job_name_with_max_failures = None - total_failed_tests = 0 - failed_to_get_info = False has_failed_statuses = False + has_native_failed_status = False for status in statuses: - if not CI.is_required(status.context): - continue - if status.state == FAILURE: + print(f"Check status [{status.context}], [{status.state}]") + if CI.is_required(status.context) and status.state != SUCCESS: + print(f"WARNING: Failed status [{status.context}], [{status.state}]") has_failed_statuses = True - failed_cnt = Utils.get_failed_tests_number(status.description) - if failed_cnt is None: - failed_to_get_info = True - else: - if failed_cnt > max_failed_tests_per_job: - job_name_with_max_failures = status.context - max_failed_tests_per_job = failed_cnt - total_failed_tests += failed_cnt - elif status.state != SUCCESS and status.context not in ( - CI.StatusNames.SYNC, - CI.StatusNames.PR_CHECK, - ): - # do not block CI on failures in (CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK) - has_failed_statuses = True - print( - f"Unexpected status for [{status.context}]: [{status.state}] - block further testing" - ) - failed_to_get_info = True - - can_continue = True - if total_failed_tests > CI.MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI: - print( - f"Required check has [{total_failed_tests}] failed - block further testing" - ) - can_continue = False - if max_failed_tests_per_job > CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI: - print( - f"Job [{job_name_with_max_failures}] has [{max_failed_tests_per_job}] failures - block further testing" - ) - can_continue = False - if failed_to_get_info: - print("Unexpected commit status state - block further testing") - can_continue = False - if args.wf_status != SUCCESS and not has_failed_statuses: - # workflow failed but reason is unknown as no failed statuses present - can_continue = False - print( - "WARNING: Either the runner is faulty or the operating status is unknown. The first is self-healing, the second requires investigation." - ) + if status.context != CI.StatusNames.SYNC: + has_native_failed_status = True if args.wf_status == SUCCESS or has_failed_statuses: - # do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted + # set Mergeable check if workflow is successful (green) + # or if we have GH statuses with failures (red) + # to avoid false-green on a died runner state = trigger_mergeable_check( commit, statuses, @@ -322,10 +283,10 @@ def main(): print( "Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status" ) - - if not can_continue: + if args.wf_status == SUCCESS and not has_native_failed_status: + sys.exit(0) + else: sys.exit(1) - sys.exit(0) # An ugly and not nice fix to patch the wrong organization URL, # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710 diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index a411fc4e8f6..59806a2a8fa 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -15,7 +15,7 @@ from env_helper import ( GITHUB_SERVER_URL, GITHUB_UPSTREAM_REPOSITORY, ) -from lambda_shared_package.lambda_shared.pr import Labels +from ci_config import Labels from get_robot_token import get_best_robot_token from github_helper import GitHub @@ -296,13 +296,16 @@ class PRInfo: else: if "schedule" in github_event: self.event_type = EventType.SCHEDULE - else: + elif "inputs" in github_event: # assume this is a dispatch self.event_type = EventType.DISPATCH - logging.warning( - "event.json does not match pull_request or push:\n%s", - json.dumps(github_event, sort_keys=True, indent=4), - ) + print("PR Info:") + print(self) + else: + logging.warning( + "event.json does not match pull_request or push:\n%s", + json.dumps(github_event, sort_keys=True, indent=4), + ) self.sha = os.getenv( "GITHUB_SHA", "0000000000000000000000000000000000000000" ) diff --git a/tests/ci/release.py b/tests/ci/release.py index 7ab1149310f..2de20d00a00 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -25,7 +25,7 @@ from contextlib import contextmanager from typing import Any, Final, Iterator, List, Optional, Tuple from git_helper import Git, commit, release_branch -from lambda_shared_package.lambda_shared.pr import Labels +from ci_config import Labels from report import SUCCESS from version_helper import ( FILE_WITH_VERSION_PATH, diff --git a/tests/ci/report.py b/tests/ci/report.py index 4be7b438f4f..77043dfc8be 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -247,6 +247,9 @@ BASE_HEADERS = ["Test name", "Test status"] # should not be in TEMP directory or any directory that may be cleaned during the job execution JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json" +JOB_STARTED_TEST_NAME = "STARTED" +JOB_FINISHED_TEST_NAME = "COMPLETED" + @dataclass class TestResult: @@ -304,14 +307,19 @@ class JobReport: exit_code: int = -1 @staticmethod - def create_pre_report() -> "JobReport": + def get_start_time_from_current(): + return datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + + @classmethod + def create_pre_report(cls, status: str, job_skipped: bool) -> "JobReport": return JobReport( - status=ERROR, + status=status, description="", test_results=[], - start_time=datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), + start_time=cls.get_start_time_from_current(), duration=0.0, additional_files=[], + job_skipped=job_skipped, pre_report=True, ) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 00942352dde..0ad01e3accd 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -15,26 +15,22 @@ from commit_status_helper import ( ) from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL from get_robot_token import get_best_robot_token -from lambda_shared_package.lambda_shared.pr import ( - CATEGORY_TO_LABEL, - TRUSTED_CONTRIBUTORS, - Labels, - check_pr_description, -) +from ci_config import CI +from ci_utils import Utils from pr_info import PRInfo from report import FAILURE, PENDING, SUCCESS, StatusType -from ci_config import CI + TRUSTED_ORG_IDS = { 54801242, # clickhouse } -OK_SKIP_LABELS = {Labels.RELEASE, Labels.PR_BACKPORT, Labels.PR_CHERRYPICK} +OK_SKIP_LABELS = {CI.Labels.RELEASE, CI.Labels.PR_BACKPORT, CI.Labels.PR_CHERRYPICK} PR_CHECK = "PR Check" def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): - if pr_user_login.lower() in TRUSTED_CONTRIBUTORS: + if pr_user_login.lower() in CI.TRUSTED_CONTRIBUTORS: logging.info("User '%s' is trusted", pr_user_login) return True @@ -63,13 +59,13 @@ def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str]: if OK_SKIP_LABELS.intersection(pr_info.labels): return True, "Don't try new checks for release/backports/cherry-picks" - if Labels.CAN_BE_TESTED not in pr_info.labels and not pr_is_by_trusted_user( + if CI.Labels.CAN_BE_TESTED not in pr_info.labels and not pr_is_by_trusted_user( pr_info.user_login, pr_info.user_orgs ): logging.info( "PRs by untrusted users need the '%s' label - " "please contact a member of the core team", - Labels.CAN_BE_TESTED, + CI.Labels.CAN_BE_TESTED, ) return False, "Needs 'can be tested' label" @@ -96,30 +92,32 @@ def main(): commit = get_commit(gh, pr_info.sha) status = SUCCESS # type: StatusType - description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY) + description_error, category = Utils.check_pr_description( + pr_info.body, GITHUB_REPOSITORY + ) pr_labels_to_add = [] pr_labels_to_remove = [] if ( - category in CATEGORY_TO_LABEL - and CATEGORY_TO_LABEL[category] not in pr_info.labels + category in CI.CATEGORY_TO_LABEL + and CI.CATEGORY_TO_LABEL[category] not in pr_info.labels ): - pr_labels_to_add.append(CATEGORY_TO_LABEL[category]) + pr_labels_to_add.append(CI.CATEGORY_TO_LABEL[category]) for label in pr_info.labels: if ( - label in CATEGORY_TO_LABEL.values() - and category in CATEGORY_TO_LABEL - and label != CATEGORY_TO_LABEL[category] + label in CI.CATEGORY_TO_LABEL.values() + and category in CI.CATEGORY_TO_LABEL + and label != CI.CATEGORY_TO_LABEL[category] ): pr_labels_to_remove.append(label) if pr_info.has_changes_in_submodules(): - pr_labels_to_add.append(Labels.SUBMODULE_CHANGED) - elif Labels.SUBMODULE_CHANGED in pr_info.labels: - pr_labels_to_remove.append(Labels.SUBMODULE_CHANGED) + pr_labels_to_add.append(CI.Labels.SUBMODULE_CHANGED) + elif CI.Labels.SUBMODULE_CHANGED in pr_info.labels: + pr_labels_to_remove.append(CI.Labels.SUBMODULE_CHANGED) - if any(label in Labels.AUTO_BACKPORT for label in pr_labels_to_add): - backport_labels = [Labels.MUST_BACKPORT, Labels.MUST_BACKPORT_CLOUD] + if any(label in CI.Labels.AUTO_BACKPORT for label in pr_labels_to_add): + backport_labels = [CI.Labels.MUST_BACKPORT, CI.Labels.MUST_BACKPORT_CLOUD] pr_labels_to_add += [ label for label in backport_labels if label not in pr_info.labels ] @@ -164,15 +162,15 @@ def main(): # 2. Then we check if the documentation is not created to fail the Mergeable check if ( - Labels.PR_FEATURE in pr_info.labels + CI.Labels.PR_FEATURE in pr_info.labels and not pr_info.has_changes_in_documentation() ): print( - f"::error ::The '{Labels.PR_FEATURE}' in the labels, " + f"::error ::The '{CI.Labels.PR_FEATURE}' in the labels, " "but there's no changed documentation" ) status = FAILURE - description = f"expect adding docs for {Labels.PR_FEATURE}" + description = f"expect adding docs for {CI.Labels.PR_FEATURE}" # 3. But we allow the workflow to continue # 4. And post only a single commit status on a failure diff --git a/tests/ci/runner_token_rotation_lambda/app.py b/tests/ci/runner_token_rotation_lambda/app.py deleted file mode 100644 index 445704648a9..00000000000 --- a/tests/ci/runner_token_rotation_lambda/app.py +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import sys - -import boto3 # type: ignore -import requests -from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token - - -def get_runner_registration_token(access_token): - headers = { - "Authorization": f"token {access_token}", - "Accept": "application/vnd.github.v3+json", - } - response = requests.post( - "https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", - headers=headers, - timeout=30, - ) - response.raise_for_status() - data = response.json() - return data["token"] - - -def main(access_token, push_to_ssm, ssm_parameter_name): - runner_registration_token = get_runner_registration_token(access_token) - - if push_to_ssm: - print("Trying to put params into ssm manager") - client = boto3.client("ssm") - client.put_parameter( - Name=ssm_parameter_name, - Value=runner_registration_token, - Type="SecureString", - Overwrite=True, - ) - else: - print( - "Not push token to AWS Parameter Store, just print:", - runner_registration_token, - ) - - -def handler(event, context): - _, _ = event, context - main(get_cached_access_token(), True, "github_runner_registration_token") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Get new token from github to add runners" - ) - parser.add_argument( - "-p", "--private-key-path", help="Path to file with private key" - ) - parser.add_argument("-k", "--private-key", help="Private key") - parser.add_argument( - "-a", "--app-id", type=int, help="GitHub application ID", required=True - ) - parser.add_argument( - "--push-to-ssm", - action="store_true", - help="Store received token in parameter store", - ) - parser.add_argument( - "--ssm-parameter-name", - default="github_runner_registration_token", - help="AWS paramater store parameter name", - ) - - args = parser.parse_args() - - if not args.private_key_path and not args.private_key: - print( - "Either --private-key-path or --private-key must be specified", - file=sys.stderr, - ) - - if args.private_key_path and args.private_key: - print( - "Either --private-key-path or --private-key must be specified", - file=sys.stderr, - ) - - if args.private_key: - private_key = args.private_key - else: - with open(args.private_key_path, "r", encoding="utf-8") as key_file: - private_key = key_file.read() - - token = get_access_token_by_key_app(private_key, args.app_id) - main(token, args.push_to_ssm, args.ssm_parameter_name) diff --git a/tests/ci/runner_token_rotation_lambda/build_and_deploy_archive.sh b/tests/ci/runner_token_rotation_lambda/build_and_deploy_archive.sh deleted file mode 120000 index 96ba3fa024e..00000000000 --- a/tests/ci/runner_token_rotation_lambda/build_and_deploy_archive.sh +++ /dev/null @@ -1 +0,0 @@ -../team_keys_lambda/build_and_deploy_archive.sh \ No newline at end of file diff --git a/tests/ci/runner_token_rotation_lambda/lambda_shared b/tests/ci/runner_token_rotation_lambda/lambda_shared deleted file mode 120000 index ba86e090f6c..00000000000 --- a/tests/ci/runner_token_rotation_lambda/lambda_shared +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package/lambda_shared \ No newline at end of file diff --git a/tests/ci/runner_token_rotation_lambda/requirements.txt b/tests/ci/runner_token_rotation_lambda/requirements.txt deleted file mode 100644 index 4cb3fba0f7b..00000000000 --- a/tests/ci/runner_token_rotation_lambda/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package[token] diff --git a/tests/ci/slack_bot_ci_lambda/app.py b/tests/ci/slack_bot_ci_lambda/app.py deleted file mode 100755 index 94b71724b1c..00000000000 --- a/tests/ci/slack_bot_ci_lambda/app.py +++ /dev/null @@ -1,323 +0,0 @@ -#!/usr/bin/env python3 - -""" -A trivial stateless slack bot that notifies about new broken tests in ClickHouse CI. -It checks what happened to our CI during the last check_period hours (1 hour) and - notifies us in slack if necessary. -This script should be executed once each check_period hours (1 hour). -It will post duplicate messages if you run it more often; it will lose some messages - if you run it less often. - -You can run it locally with no arguments, it will work in a dry-run mode. - Or you can set your own SLACK_URL_DEFAULT. -Feel free to add more checks, more details to messages, or better heuristics. - -It's deployed to slack-bot-ci-lambda in CI/CD account - -See also: https://aretestsgreenyet.com/ -""" - -import base64 -import json -import os -import random - -import requests - -DRY_RUN_MARK = "" - -MAX_FAILURES_DEFAULT = 30 -SLACK_URL_DEFAULT = DRY_RUN_MARK - -FLAKY_ALERT_PROBABILITY = 0.50 -REPORT_NO_FAILURES_PROBABILITY = 0.99 - -MAX_TESTS_TO_REPORT = 4 - -# Slack has a stupid limitation on message size, it splits long messages into multiple, -# ones breaking formatting -MESSAGE_LENGTH_LIMIT = 4000 - -# Find tests that failed in master during the last check_period * 24 hours, -# but did not fail during the last 2 weeks. Assuming these tests were broken recently. -# Counts number of failures in check_period and check_period * 24 time windows -# to distinguish rare flaky tests from completely broken tests -NEW_BROKEN_TESTS_QUERY = """ -WITH - 1 AS check_period, - check_period * 24 AS extended_check_period, - now() as now -SELECT - test_name, - any(report_url), - countIf((check_start_time + check_duration_ms / 1000) < now - INTERVAL check_period HOUR) AS count_prev_periods, - countIf((check_start_time + check_duration_ms / 1000) >= now - INTERVAL check_period HOUR) AS count -FROM checks -WHERE 1 - AND check_start_time BETWEEN now - INTERVAL 1 WEEK AND now - AND (check_start_time + check_duration_ms / 1000) >= now - INTERVAL extended_check_period HOUR - AND pull_request_number = 0 - AND test_status LIKE 'F%' - AND check_status != 'success' - AND test_name NOT IN ( - SELECT test_name FROM checks WHERE 1 - AND check_start_time >= now - INTERVAL 1 MONTH - AND (check_start_time + check_duration_ms / 1000) BETWEEN now - INTERVAL 2 WEEK AND now - INTERVAL extended_check_period HOUR - AND pull_request_number = 0 - AND check_status != 'success' - AND test_status LIKE 'F%') - AND test_context_raw NOT LIKE '%CannotSendRequest%' and test_context_raw NOT LIKE '%Server does not respond to health check%' -GROUP BY test_name -ORDER BY (count_prev_periods + count) DESC -""" - -# Returns total number of failed checks during the last 24 hours -# and previous value of that metric (check_period hours ago) -COUNT_FAILURES_QUERY = """ -WITH - 1 AS check_period, - '%' AS check_name_pattern, - now() as now -SELECT - countIf((check_start_time + check_duration_ms / 1000) >= now - INTERVAL 24 HOUR) AS new_val, - countIf((check_start_time + check_duration_ms / 1000) <= now - INTERVAL check_period HOUR) AS prev_val -FROM checks -WHERE 1 - AND check_start_time >= now - INTERVAL 1 WEEK - AND (check_start_time + check_duration_ms / 1000) >= now - INTERVAL 24 + check_period HOUR - AND pull_request_number = 0 - AND test_status LIKE 'F%' - AND check_status != 'success' - AND check_name ILIKE check_name_pattern -""" - -# Returns percentage of failed checks (once per day, at noon) -FAILED_CHECKS_PERCENTAGE_QUERY = """ -SELECT if(toHour(now('Europe/Amsterdam')) = 12, v, 0) -FROM -( - SELECT - countDistinctIf((commit_sha, check_name), (test_status LIKE 'F%') AND (check_status != 'success')) - / countDistinct((commit_sha, check_name)) AS v - FROM checks - WHERE 1 - AND (pull_request_number = 0) - AND (test_status != 'SKIPPED') - AND (check_start_time > (now() - toIntervalDay(1))) -) -""" - -# It shows all recent failures of the specified test (helps to find when it started) -ALL_RECENT_FAILURES_QUERY = """ -WITH - '{}' AS name_substr, - 90 AS interval_days, - ('Stateless tests (asan)', 'Stateless tests (address)', 'Stateless tests (address, actions)', 'Integration tests (asan) [1/3]', 'Stateless tests (tsan) [1/3]') AS backport_and_release_specific_checks -SELECT - toStartOfDay(check_start_time) AS d, - count(), - groupUniqArray(pull_request_number) AS prs, - any(report_url) -FROM checks -WHERE ((now() - toIntervalDay(interval_days)) <= check_start_time) AND (pull_request_number NOT IN ( - SELECT pull_request_number AS prn - FROM checks - WHERE (prn != 0) AND ((now() - toIntervalDay(interval_days)) <= check_start_time) AND (check_name IN (backport_and_release_specific_checks)) -)) AND (position(test_name, name_substr) > 0) AND (test_status IN ('FAIL', 'ERROR', 'FLAKY')) -GROUP BY d -ORDER BY d DESC -""" - -SLACK_MESSAGE_JSON = {"type": "mrkdwn", "text": None} - - -def get_play_url(query): - return ( - "https://play.clickhouse.com/play?user=play#" - + base64.b64encode(query.encode()).decode() - ) - - -def run_clickhouse_query(query): - url = "https://play.clickhouse.com/?user=play&query=" + requests.compat.quote(query) - res = requests.get(url, timeout=30) - if res.status_code != 200: - print("Failed to execute query: ", res.status_code, res.content) - res.raise_for_status() - - lines = res.text.strip().splitlines() - return [x.split("\t") for x in lines] - - -def split_broken_and_flaky_tests(failed_tests): - if not failed_tests: - return None - - broken_tests = [] - flaky_tests = [] - for name, report, count_prev_str, count_str in failed_tests: - count_prev, count = int(count_prev_str), int(count_str) - if (count_prev < 2 <= count) or (count_prev == count == 1): - # It failed 2 times or more within extended time window, it's definitely broken. - # 2 <= count means that it was not reported as broken on previous runs - broken_tests.append([name, report]) - elif 0 < count and count_prev == 0: - # It failed only once, can be a rare flaky test - flaky_tests.append([name, report]) - - return broken_tests, flaky_tests - - -def format_failed_tests_list(failed_tests, failure_type): - if len(failed_tests) == 1: - res = f"There is a new {failure_type} test:\n" - else: - res = f"There are {len(failed_tests)} new {failure_type} tests:\n" - - for name, report in failed_tests[:MAX_TESTS_TO_REPORT]: - cidb_url = get_play_url(ALL_RECENT_FAILURES_QUERY.format(name)) - res += f"- *{name}* - <{report}|Report> - <{cidb_url}|CI DB> \n" - - if MAX_TESTS_TO_REPORT < len(failed_tests): - res += ( - f"- and {len(failed_tests) - MAX_TESTS_TO_REPORT} other " - "tests... :this-is-fine-fire:" - ) - - return res - - -def get_new_broken_tests_message(failed_tests): - if not failed_tests: - return None - - broken_tests, flaky_tests = split_broken_and_flaky_tests(failed_tests) - if len(broken_tests) == 0 and len(flaky_tests) == 0: - return None - - msg = "" - if len(broken_tests) > 0: - msg += format_failed_tests_list(broken_tests, "*BROKEN*") - elif random.random() > FLAKY_ALERT_PROBABILITY: - looks_like_fuzzer = [x[0].count(" ") > 2 for x in flaky_tests] - if not any(looks_like_fuzzer): - print("Will not report flaky tests to avoid noise: ", flaky_tests) - return None - - if len(flaky_tests) > 0: - if len(msg) > 0: - msg += "\n" - msg += format_failed_tests_list(flaky_tests, "flaky") - - return msg - - -def get_too_many_failures_message_impl(failures_count): - MAX_FAILURES = int(os.environ.get("MAX_FAILURES", MAX_FAILURES_DEFAULT)) - curr_failures = int(failures_count[0][0]) - prev_failures = int(failures_count[0][1]) - if curr_failures == 0 and prev_failures != 0: - if random.random() < REPORT_NO_FAILURES_PROBABILITY: - return None - return "Wow, there are *no failures* at all... 0_o" - return_none = ( - curr_failures < MAX_FAILURES - or curr_failures < prev_failures - or (curr_failures - prev_failures) / prev_failures < 0.2 - ) - if return_none: - return None - if prev_failures < MAX_FAILURES: - return f":alert: *CI is broken: there are {curr_failures} failures during the last 24 hours*" - return "CI is broken and it's getting worse: there are {curr_failures} failures during the last 24 hours" - - -def get_too_many_failures_message(failures_count): - msg = get_too_many_failures_message_impl(failures_count) - if msg: - msg += "\nSee https://aretestsgreenyet.com/" - return msg - - -def get_failed_checks_percentage_message(percentage): - p = float(percentage[0][0]) * 100 - - # Always report more than 1% of failed checks - # For <= 1%: higher percentage of failures == higher probability - if p <= random.random(): - return None - - msg = ":alert: " if p > 1 else "Only " if p < 0.5 else "" - msg += f"*{p:.2f}%* of all checks in master have failed yesterday" - return msg - - -def split_slack_message(long_message): - lines = long_message.split("\n") - messages = [] - curr_msg = "" - for line in lines: - if len(curr_msg) + len(line) < MESSAGE_LENGTH_LIMIT: - curr_msg += "\n" - curr_msg += line - else: - messages.append(curr_msg) - curr_msg = line - messages.append(curr_msg) - return messages - - -def send_to_slack_impl(message): - SLACK_URL = os.environ.get("SLACK_URL", SLACK_URL_DEFAULT) - if SLACK_URL == DRY_RUN_MARK: - return - - payload = SLACK_MESSAGE_JSON.copy() - payload["text"] = message - res = requests.post(SLACK_URL, json.dumps(payload), timeout=30) - if res.status_code != 200: - print("Failed to send a message to Slack: ", res.status_code, res.content) - res.raise_for_status() - - -def send_to_slack(message): - messages = split_slack_message(message) - for msg in messages: - send_to_slack_impl(msg) - - -def query_and_alert_if_needed(query, get_message_func): - query_res = run_clickhouse_query(query) - print("Got result {} for query {}", query_res, query) - msg = get_message_func(query_res) - if msg is None: - return - - msg += f"\nCI DB query: <{get_play_url(query)}|link>" - print("Sending message to slack:", msg) - send_to_slack(msg) - - -def check_and_alert(): - query_and_alert_if_needed(NEW_BROKEN_TESTS_QUERY, get_new_broken_tests_message) - query_and_alert_if_needed(COUNT_FAILURES_QUERY, get_too_many_failures_message) - query_and_alert_if_needed( - FAILED_CHECKS_PERCENTAGE_QUERY, get_failed_checks_percentage_message - ) - - -def handler(event, context): - _, _ = event, context - try: - check_and_alert() - return {"statusCode": 200, "body": "OK"} - except Exception as e: - send_to_slack( - "I failed, please help me " - f"(see ClickHouse/ClickHouse/tests/ci/slack_bot_ci_lambda/app.py): {e}" - ) - return {"statusCode": 200, "body": "FAIL"} - - -if __name__ == "__main__": - check_and_alert() diff --git a/tests/ci/slack_bot_ci_lambda/build_and_deploy_archive.sh b/tests/ci/slack_bot_ci_lambda/build_and_deploy_archive.sh deleted file mode 120000 index 96ba3fa024e..00000000000 --- a/tests/ci/slack_bot_ci_lambda/build_and_deploy_archive.sh +++ /dev/null @@ -1 +0,0 @@ -../team_keys_lambda/build_and_deploy_archive.sh \ No newline at end of file diff --git a/tests/ci/slack_bot_ci_lambda/requirements.txt b/tests/ci/slack_bot_ci_lambda/requirements.txt deleted file mode 100644 index 098e04a9798..00000000000 --- a/tests/ci/slack_bot_ci_lambda/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 486bfc25e22..85da601e379 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -3,6 +3,7 @@ import csv import logging import os +import re import subprocess import sys from pathlib import Path @@ -19,6 +20,18 @@ from stopwatch import Stopwatch from tee_popen import TeePopen +class SensitiveFormatter(logging.Formatter): + @staticmethod + def _filter(s): + return re.sub( + r"(.*)(AZURE_CONNECTION_STRING.*\')(.*)", r"\1AZURE_CONNECTION_STRING\3", s + ) + + def format(self, record): + original = logging.Formatter.format(self, record) + return self._filter(original) + + def get_additional_envs(check_name: str) -> List[str]: result = [] azure_connection_string = get_parameter_from_ssm("azure_connection_string") @@ -117,6 +130,9 @@ def process_results( def run_stress_test(docker_image_name: str) -> None: logging.basicConfig(level=logging.INFO) + for handler in logging.root.handlers: + # pylint: disable=protected-access + handler.setFormatter(SensitiveFormatter(handler.formatter._fmt)) # type: ignore stopwatch = Stopwatch() temp_path = Path(TEMP_PATH) diff --git a/tests/ci/team_keys_lambda/app.py b/tests/ci/team_keys_lambda/app.py deleted file mode 100644 index c056808a9b4..00000000000 --- a/tests/ci/team_keys_lambda/app.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import json -from datetime import datetime -from queue import Queue -from threading import Thread - -import boto3 # type: ignore -import requests - - -class Keys(set): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.updated_at = 0.0 - - def update_now(self): - self.updated_at = datetime.now().timestamp() - - -keys = Keys() - - -class Worker(Thread): - def __init__(self, request_queue): - Thread.__init__(self) - self.queue = request_queue - self.results = set() - - def run(self): - while True: - m = self.queue.get() - if m == "": - break - response = requests.get(f"https://github.com/{m}.keys", timeout=30) - self.results.add(f"# {m}\n{response.text}\n") - self.queue.task_done() - - -def get_org_team_members(token: str, org: str, team_slug: str) -> set: - headers = { - "Authorization": f"token {token}", - "Accept": "application/vnd.github.v3+json", - } - response = requests.get( - f"https://api.github.com/orgs/{org}/teams/{team_slug}/members", - headers=headers, - timeout=30, - ) - response.raise_for_status() - data = response.json() - return set(m["login"] for m in data) - - -def get_cached_members_keys(members: set) -> Keys: - if (datetime.now().timestamp() - 3600) <= keys.updated_at: - return keys - - q = Queue() # type: Queue - workers = [] - for m in members: - q.put(m) - # Create workers and add to the queue - worker = Worker(q) - worker.start() - workers.append(worker) - - # Workers keep working till they receive an empty string - for _ in workers: - q.put("") - - # Join workers to wait till they finished - for worker in workers: - worker.join() - - keys.clear() - for worker in workers: - keys.update(worker.results) - keys.update_now() - return keys - - -def get_token_from_aws() -> str: - # We need a separate token, since the clickhouse-ci app does not have - # access to the organization members' endpoint - secret_name = "clickhouse_robot_token" - session = boto3.session.Session() - client = session.client( - service_name="secretsmanager", - ) - get_secret_value_response = client.get_secret_value(SecretId=secret_name) - data = json.loads(get_secret_value_response["SecretString"]) - return data["clickhouse_robot_token"] # type: ignore - - -def main(token: str, org: str, team_slug: str) -> str: - members = get_org_team_members(token, org, team_slug) - keys = get_cached_members_keys(members) - - return "".join(sorted(keys)) - - -def handler(event, context): - _ = context - _ = event - if keys.updated_at < (datetime.now().timestamp() - 3600): - token = get_token_from_aws() - body = main(token, "ClickHouse", "core") - else: - body = "".join(sorted(keys)) - - result = { - "statusCode": 200, - "headers": { - "Content-Type": "text/html", - }, - "body": body, - } - return result - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Get the public SSH keys for members of given org and team" - ) - parser.add_argument("--token", required=True, help="Github PAT") - parser.add_argument( - "--organization", help="GitHub organization name", default="ClickHouse" - ) - parser.add_argument("--team", help="GitHub team name", default="core") - - args = parser.parse_args() - output = main(args.token, args.organization, args.team) - - print(f"# Just showing off the keys:\n{output}") diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh deleted file mode 100644 index b72bce4a677..00000000000 --- a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env bash -set -xeo pipefail - -WORKDIR=$(dirname "$0") -WORKDIR=$(readlink -f "${WORKDIR}") -DIR_NAME=$(basename "$WORKDIR") -cd "$WORKDIR" - -# Do not deploy the lambda to AWS -DRY_RUN=${DRY_RUN:-} -# Python runtime to install dependencies -PY_VERSION=${PY_VERSION:-3.10} -PY_EXEC="python${PY_VERSION}" -# Image to build the lambda zip package -DOCKER_IMAGE="public.ecr.aws/lambda/python:${PY_VERSION}" -# Rename the_lambda_name directory to the-lambda-name lambda in AWS -LAMBDA_NAME=${DIR_NAME//_/-} -# The name of directory with lambda code -PACKAGE=lambda-package - -# Do not rebuild and deploy the archive if it's newer than sources -if [ -e "$PACKAGE.zip" ] && [ -z "$FORCE" ]; then - REBUILD="" - for src in app.py build_and_deploy_archive.sh requirements.txt lambda_shared/*; do - if [ "$src" -nt "$PACKAGE.zip" ]; then - REBUILD=1 - fi - done - [ -n "$REBUILD" ] || exit 0 -fi - -docker_cmd=( - docker run -i --net=host --rm --user="${UID}" -e HOME=/tmp --entrypoint=/bin/bash - --volume="${WORKDIR}/..:/ci" --workdir="/ci/${DIR_NAME}" "${DOCKER_IMAGE}" -) -rm -rf "$PACKAGE" "$PACKAGE".zip -mkdir "$PACKAGE" -cp app.py "$PACKAGE" -if [ -f requirements.txt ]; then - VENV=lambda-venv - rm -rf "$VENV" - "${docker_cmd[@]}" -ex < dict: - """return cached instances description with updating it once per five minutes""" - if time.time() - 250 < cached_instances.time or cached_instances.updating: - return cached_instances.value - cached_instances.updating = cached_value_is_valid(cached_instances.time, 300) - ec2_client = boto3.client("ec2") - instances_response = ec2_client.describe_instances( - Filters=[{"Name": "instance-state-name", "Values": ["running"]}] - ) - cached_instances.time = time.time() - cached_instances.value = { - instance["InstanceId"]: instance - for reservation in instances_response["Reservations"] - for instance in reservation["Instances"] - } - cached_instances.updating = False - return cached_instances.value - - -@dataclass -class CachedRunners: - time: float - value: RunnerDescriptions - updating: bool = False - - -cached_runners = CachedRunners(0, []) - - -def get_cached_runners(access_token: str) -> RunnerDescriptions: - """From time to time request to GH api costs up to 3 seconds, and - it's a disaster from the termination lambda perspective""" - if time.time() - 5 < cached_runners.time or cached_instances.updating: - return cached_runners.value - cached_runners.updating = cached_value_is_valid(cached_runners.time, 15) - cached_runners.value = list_runners(access_token) - cached_runners.time = time.time() - cached_runners.updating = False - return cached_runners.value - - -def how_many_instances_to_kill(event_data: dict) -> Dict[str, int]: - data_array = event_data["CapacityToTerminate"] - to_kill_by_zone = {} # type: Dict[str, int] - for av_zone in data_array: - zone_name = av_zone["AvailabilityZone"] - to_kill = av_zone["Capacity"] - if zone_name not in to_kill_by_zone: - to_kill_by_zone[zone_name] = 0 - - to_kill_by_zone[zone_name] += to_kill - - return to_kill_by_zone - - -def get_candidates_to_be_killed(event_data: dict) -> Dict[str, List[str]]: - data_array = event_data["Instances"] - instances_by_zone = {} # type: Dict[str, List[str]] - for instance in data_array: - zone_name = instance["AvailabilityZone"] - instance_id = instance["InstanceId"] # type: str - if zone_name not in instances_by_zone: - instances_by_zone[zone_name] = [] - instances_by_zone[zone_name].append(instance_id) - - return instances_by_zone - - -def main(access_token: str, event: dict) -> Dict[str, List[str]]: - start = time.time() - print("Got event", json.dumps(event, sort_keys=True).replace("\n", "")) - to_kill_by_zone = how_many_instances_to_kill(event) - instances_by_zone = get_candidates_to_be_killed(event) - # Getting ASG and instances' descriptions from the API - # We don't kill instances that alive for less than 10 minutes, since they - # could be not in the GH active runners yet - print(f"Check other hosts from the same ASG {event['AutoScalingGroupName']}") - asg_client = boto3.client("autoscaling") - as_groups_response = asg_client.describe_auto_scaling_groups( - AutoScalingGroupNames=[event["AutoScalingGroupName"]] - ) - assert len(as_groups_response["AutoScalingGroups"]) == 1 - asg = as_groups_response["AutoScalingGroups"][0] - asg_instance_ids = [instance["InstanceId"] for instance in asg["Instances"]] - instance_descriptions = get_cached_instances() - # The instances launched less than 10 minutes ago - immune_ids = [ - instance["InstanceId"] - for instance in instance_descriptions.values() - if start - instance["LaunchTime"].timestamp() < 600 - ] - # if the ASG's instance ID not in instance_descriptions, it's most probably - # is not cached yet, so we must mark it as immuned - immune_ids.extend( - iid for iid in asg_instance_ids if iid not in instance_descriptions - ) - print("Time spent on the requests to AWS: ", time.time() - start) - - runners = get_cached_runners(access_token) - runner_ids = set(runner.name for runner in runners) - # We used to delete potential hosts to terminate from GitHub runners pool, - # but the documentation states: - # --- Returning an instance first in the response data does not guarantee its termination - # so they will be cleaned out by ci_runners_metrics_lambda eventually - - instances_to_kill = [] - total_to_kill = 0 - for zone, num_to_kill in to_kill_by_zone.items(): - candidates = instances_by_zone[zone] - total_to_kill += num_to_kill - if num_to_kill > len(candidates): - raise RuntimeError( - f"Required to kill {num_to_kill}, but have only {len(candidates)}" - f" candidates in AV {zone}" - ) - - delete_for_av = [] # type: RunnerDescriptions - for candidate in candidates: - if candidate in immune_ids: - print( - f"Candidate {candidate} started less than 10 minutes ago, won't touch a child" - ) - break - if candidate not in runner_ids: - print( - f"Candidate {candidate} was not in runners list, simply delete it" - ) - instances_to_kill.append(candidate) - break - if len(delete_for_av) + len(instances_to_kill) == num_to_kill: - break - if candidate in instances_to_kill: - continue - - for runner in runners: - if runner.name == candidate: - if not runner.busy: - print( - f"Runner {runner.name} is not busy and can be deleted from AV {zone}" - ) - delete_for_av.append(runner) - else: - print(f"Runner {runner.name} is busy, not going to delete it") - break - - if len(delete_for_av) < num_to_kill: - print( - f"Checked all candidates for av {zone}, get to delete " - f"{len(delete_for_av)}, but still cannot get required {num_to_kill}" - ) - - instances_to_kill += [runner.name for runner in delete_for_av] - - if len(instances_to_kill) < total_to_kill: - for instance in asg_instance_ids: - if instance in immune_ids: - continue - for runner in runners: - if runner.name == instance and not runner.busy: - print(f"Runner {runner.name} is not busy and can be deleted") - instances_to_kill.append(runner.name) - - if total_to_kill <= len(instances_to_kill): - print("Got enough instances to kill") - break - - response = {"InstanceIDs": instances_to_kill} - print("Got instances to kill: ", response) - print("Time spent on the request: ", time.time() - start) - return response - - -def handler(event: dict, context: Any) -> Dict[str, List[str]]: - _ = context - return main(get_cached_access_token(), event) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Get list of runners and their states") - parser.add_argument( - "-p", "--private-key-path", help="Path to file with private key" - ) - parser.add_argument("-k", "--private-key", help="Private key") - parser.add_argument( - "-a", "--app-id", type=int, help="GitHub application ID", required=True - ) - - args = parser.parse_args() - - if not args.private_key_path and not args.private_key: - print( - "Either --private-key-path or --private-key must be specified", - file=sys.stderr, - ) - - if args.private_key_path and args.private_key: - print( - "Either --private-key-path or --private-key must be specified", - file=sys.stderr, - ) - - if args.private_key: - private_key = args.private_key - else: - with open(args.private_key_path, "r", encoding="utf-8") as key_file: - private_key = key_file.read() - - token = get_access_token_by_key_app(private_key, args.app_id) - - sample_event = { - "AutoScalingGroupARN": "arn:aws:autoscaling:us-east-1::autoScalingGroup:d4738357-2d40-4038-ae7e-b00ae0227003:autoScalingGroupName/my-asg", - "AutoScalingGroupName": "my-asg", - "CapacityToTerminate": [ - { - "AvailabilityZone": "us-east-1b", - "Capacity": 1, - "InstanceMarketOption": "OnDemand", - }, - { - "AvailabilityZone": "us-east-1c", - "Capacity": 2, - "InstanceMarketOption": "OnDemand", - }, - ], - "Instances": [ - { - "AvailabilityZone": "us-east-1b", - "InstanceId": "i-08d0b3c1a137e02a5", - "InstanceType": "t2.nano", - "InstanceMarketOption": "OnDemand", - }, - { - "AvailabilityZone": "us-east-1c", - "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", - "InstanceType": "t2.nano", - "InstanceMarketOption": "OnDemand", - }, - { - "AvailabilityZone": "us-east-1c", - "InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal", - "InstanceType": "t2.nano", - "InstanceMarketOption": "OnDemand", - }, - { - "AvailabilityZone": "us-east-1c", - "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", - "InstanceType": "t2.nano", - "InstanceMarketOption": "OnDemand", - }, - ], - "Cause": "SCALE_IN", - } - - main(token, sample_event) diff --git a/tests/ci/terminate_runner_lambda/build_and_deploy_archive.sh b/tests/ci/terminate_runner_lambda/build_and_deploy_archive.sh deleted file mode 120000 index 96ba3fa024e..00000000000 --- a/tests/ci/terminate_runner_lambda/build_and_deploy_archive.sh +++ /dev/null @@ -1 +0,0 @@ -../team_keys_lambda/build_and_deploy_archive.sh \ No newline at end of file diff --git a/tests/ci/terminate_runner_lambda/lambda_shared b/tests/ci/terminate_runner_lambda/lambda_shared deleted file mode 120000 index ba86e090f6c..00000000000 --- a/tests/ci/terminate_runner_lambda/lambda_shared +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package/lambda_shared \ No newline at end of file diff --git a/tests/ci/terminate_runner_lambda/requirements.txt b/tests/ci/terminate_runner_lambda/requirements.txt deleted file mode 100644 index 4cb3fba0f7b..00000000000 --- a/tests/ci/terminate_runner_lambda/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package[token] diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index e12a67bfc92..44142050821 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 - +import copy import unittest import random @@ -283,6 +283,7 @@ class TestCIConfig(unittest.TestCase): ci_cache = CIPY._configure_jobs( S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True ) + ci_cache.filter_out_not_affected_jobs() actual_jobs_to_do = list(ci_cache.jobs_to_do) expected_jobs_to_do = [] for set_ in settings.ci_sets: @@ -294,6 +295,13 @@ class TestCIConfig(unittest.TestCase): continue expected_jobs_to_do.append(job) for job, config in CI.JOB_CONFIGS.items(): + if ( + CI.is_build_job(job) + and not config.run_by_label + and job not in expected_jobs_to_do + ): + # expected to run all builds jobs + expected_jobs_to_do.append(job) if not any( keyword in normalize_string(job) for keyword in settings.include_keywords @@ -415,6 +423,30 @@ class TestCIConfig(unittest.TestCase): """ checks ci.py job configuration """ + + def _reset_ci_cache_to_wait_all_jobs(ci_cache): + # pretend there are pending jobs that we need to wait + ci_cache.jobs_to_wait = dict(ci_cache.jobs_to_do) + for job, config in ci_cache.jobs_to_wait.items(): + assert config.batches + config.pending_batches = list(config.batches) + + for batch in range(config.num_batches): + record = CiCache.Record( + record_type=CiCache.RecordType.PENDING, + job_name=job, + job_digest=ci_cache.job_digests[job], + batch=batch, + num_batches=config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.PENDING.value: + records_[record.to_str_key()] = record + assert not ci_cache.jobs_to_skip + assert ci_cache.jobs_to_wait + ci_cache.jobs_to_skip = [] + settings = CiSettings() settings.no_ci_cache = True pr_info = PRInfo(github_event=_TEST_EVENT_JSON) @@ -431,26 +463,6 @@ class TestCIConfig(unittest.TestCase): assert not ci_cache.jobs_to_skip assert not ci_cache.jobs_to_wait - # pretend there are pending jobs that we need to wait - ci_cache.jobs_to_wait = dict(ci_cache.jobs_to_do) - for job, config in ci_cache.jobs_to_wait.items(): - assert not config.pending_batches - assert config.batches - config.pending_batches = list(config.batches) - for job, config in ci_cache.jobs_to_wait.items(): - for batch in range(config.num_batches): - record = CiCache.Record( - record_type=CiCache.RecordType.PENDING, - job_name=job, - job_digest=ci_cache.job_digests[job], - batch=batch, - num_batches=config.num_batches, - release_branch=True, - ) - for record_t_, records_ in ci_cache.records.items(): - if record_t_.value == CiCache.RecordType.PENDING.value: - records_[record.to_str_key()] = record - def _test_await_for_batch( ci_cache: CiCache, record_type: CiCache.RecordType, batch: int ) -> None: @@ -476,32 +488,76 @@ class TestCIConfig(unittest.TestCase): and batch < config_.num_batches ): assert batch not in config_.pending_batches - else: - assert batch in config_.pending_batches for _, config_ in ci_cache.jobs_to_do.items(): # jobs to do must have batches to run before/after await # if it's an empty list after await - apparently job has not been removed after await assert config_.batches - _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 0) - # check all one-batch jobs are in jobs_to_skip - for job in all_jobs_in_wf: - config = CI.JOB_CONFIGS[job] - if config.num_batches == 1: - self.assertTrue(job in ci_cache.jobs_to_skip) - self.assertTrue(job not in ci_cache.jobs_to_do) - else: - self.assertTrue(job not in ci_cache.jobs_to_skip) - self.assertTrue(job in ci_cache.jobs_to_do) - - _test_await_for_batch(ci_cache, CiCache.RecordType.FAILED, 1) - _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 2) - - self.assertTrue(len(ci_cache.jobs_to_skip) > 0) - self.assertTrue(len(ci_cache.jobs_to_do) > 0) + _reset_ci_cache_to_wait_all_jobs(ci_cache) + _test_await_for_batch(ci_cache, CiCache.RecordType.FAILED, 0) + tested = False + for job, config in ci_cache.jobs_to_do.items(): + if config.batches == [0]: + tested = True + self.assertTrue( + job not in ci_cache.jobs_to_wait, + "Job must be removed from @jobs_to_wait, because its only batch has FAILED cache record", + ) self.assertCountEqual( - list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, all_jobs_in_wf + ci_cache.jobs_to_skip, + [], + "No jobs must be skipped, since all cache records are of type FAILED", + ) + assert tested + + # reset jobs_to_wait after previous test + _reset_ci_cache_to_wait_all_jobs(ci_cache) + assert not ci_cache.jobs_to_skip + + # set batch 0 as SUCCESSFUL in ci cache + jobs_to_do_prev = list(ci_cache.jobs_to_do) + jobs_to_skip_prev = [] + jobs_to_wait_prev = list(ci_cache.jobs_to_wait) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 0) + self.assertTrue(len(jobs_to_skip_prev) != len(ci_cache.jobs_to_skip)) + self.assertTrue(len(jobs_to_wait_prev) > len(ci_cache.jobs_to_wait)) + self.assertCountEqual( + list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, + jobs_to_do_prev + jobs_to_skip_prev, + ) + + # set batch 1 as SUCCESSFUL in ci cache + jobs_to_do_prev = list(ci_cache.jobs_to_do) + jobs_to_skip_prev = list(ci_cache.jobs_to_skip) + jobs_to_wait_prev = list(ci_cache.jobs_to_wait) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 1) + self.assertTrue(len(jobs_to_skip_prev) != len(ci_cache.jobs_to_skip)) + self.assertTrue(len(jobs_to_wait_prev) > len(ci_cache.jobs_to_wait)) + self.assertCountEqual( + list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, + jobs_to_do_prev + jobs_to_skip_prev, + ) + + # set batch 3, 4, 5, 6 as SUCCESSFUL in ci cache + jobs_to_do_prev = list(ci_cache.jobs_to_do) + jobs_to_skip_prev = list(ci_cache.jobs_to_skip) + jobs_to_wait_prev = list(ci_cache.jobs_to_wait) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 2) + self.assertTrue(ci_cache.jobs_to_do) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 3) + self.assertTrue(ci_cache.jobs_to_do) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 4) + self.assertTrue(ci_cache.jobs_to_do) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 5) + self.assertTrue( + not ci_cache.jobs_to_do + ) # by this moment there must be no jobs left as batch 5 is currently the maximum + self.assertTrue(len(jobs_to_skip_prev) != len(ci_cache.jobs_to_skip)) + self.assertTrue(len(jobs_to_wait_prev) > len(ci_cache.jobs_to_wait)) + self.assertCountEqual( + list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, + jobs_to_do_prev + jobs_to_skip_prev, ) def test_ci_py_filters_not_affected_jobs_in_prs(self): @@ -531,11 +587,11 @@ class TestCIConfig(unittest.TestCase): for job, job_config in ci_cache.jobs_to_do.items(): if job in MOCK_AFFECTED_JOBS: MOCK_REQUIRED_BUILDS += job_config.required_builds - elif job not in MOCK_AFFECTED_JOBS: + elif job not in MOCK_AFFECTED_JOBS and not job_config.disable_await: ci_cache.jobs_to_wait[job] = job_config for job, job_config in ci_cache.jobs_to_do.items(): - if job_config.reference_job_name: + if job_config.reference_job_name or job_config.disable_await: # jobs with reference_job_name in config are not supposed to have records in the cache - continue continue if job in MOCK_AFFECTED_JOBS: @@ -568,11 +624,76 @@ class TestCIConfig(unittest.TestCase): + MOCK_AFFECTED_JOBS + MOCK_REQUIRED_BUILDS ) + self.assertTrue( + CI.JobNames.BUILD_CHECK not in ci_cache.jobs_to_wait, + "We must never await on Builds Report", + ) self.assertCountEqual( list(ci_cache.jobs_to_wait), - [ - CI.JobNames.BUILD_CHECK, - ] - + MOCK_REQUIRED_BUILDS, + MOCK_REQUIRED_BUILDS, + ) + self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do) + + def test_ci_py_filters_not_affected_jobs_in_prs_no_builds(self): + """ + checks ci.py filters not affected jobs in PRs, no builds required + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + pr_info.event_type = EventType.PULL_REQUEST + pr_info.number = 123 + assert pr_info.is_pr + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list") + assert not ci_cache.jobs_to_wait + assert not ci_cache.jobs_to_skip + + MOCK_AFFECTED_JOBS = [ + CI.JobNames.FAST_TEST, + ] + MOCK_REQUIRED_BUILDS = [] + + # pretend there are pending jobs that we need to wait + for job, job_config in ci_cache.jobs_to_do.items(): + if job in MOCK_AFFECTED_JOBS: + if job_config.required_builds: + MOCK_REQUIRED_BUILDS += job_config.required_builds + elif job not in MOCK_AFFECTED_JOBS and not job_config.disable_await: + ci_cache.jobs_to_wait[job] = job_config + + for job, job_config in ci_cache.jobs_to_do.items(): + if job_config.reference_job_name or job_config.disable_await: + # jobs with reference_job_name in config are not supposed to have records in the cache - continue + continue + if job in MOCK_AFFECTED_JOBS: + continue + for batch in range(job_config.num_batches): + # add any record into cache + record = CiCache.Record( + record_type=random.choice( + [ + CiCache.RecordType.FAILED, + CiCache.RecordType.PENDING, + CiCache.RecordType.SUCCESSFUL, + ] + ), + job_name=job, + job_digest=ci_cache.job_digests[job], + batch=batch, + num_batches=job_config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.FAILED.value: + records_[record.to_str_key()] = record + + ci_cache.filter_out_not_affected_jobs() + expected_to_do = MOCK_AFFECTED_JOBS + MOCK_REQUIRED_BUILDS + self.assertCountEqual( + list(ci_cache.jobs_to_wait), + MOCK_REQUIRED_BUILDS, ) self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index f4d14a17512..534885dda2b 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -8,8 +8,8 @@ from ci_config import CI _TEST_BODY_1 = """ #### Run only: -- [x] Non required -- [ ] Integration tests (arm64) +- [ ] Some Set +- [x] Integration tests (arm64) - [x] Integration tests - [x] Integration tests - [ ] Integration tests @@ -149,7 +149,7 @@ class TestCIOptions(unittest.TestCase): self.assertFalse(ci_options.no_ci_cache) self.assertTrue(ci_options.no_merge_commit) self.assertTrue(ci_options.woolen_wolfdog) - self.assertEqual(ci_options.ci_sets, ["ci_set_non_required"]) + self.assertEqual(ci_options.ci_sets, ["ci_set_arm"]) self.assertCountEqual(ci_options.include_keywords, ["foo", "foo_bar"]) self.assertCountEqual(ci_options.exclude_keywords, ["foo", "foo_bar"]) @@ -197,6 +197,10 @@ class TestCIOptions(unittest.TestCase): "package_debug", "package_msan", "package_ubsan", + "package_aarch64", + "package_release_coverage", + "package_tsan", + "binary_release", "Stateless tests (asan)", "Stateless tests (azure, asan)", "Stateless tests flaky check (asan)", @@ -276,6 +280,7 @@ class TestCIOptions(unittest.TestCase): filtered_jobs, [ "Style check", + "fuzzers", ], ) @@ -291,9 +296,7 @@ class TestCIOptions(unittest.TestCase): ) self.assertCountEqual( filtered_jobs, - [ - "Style check", - ], + ["Style check", "fuzzers"], ) def test_options_applied_4(self): @@ -329,5 +332,12 @@ class TestCIOptions(unittest.TestCase): "Stateless tests (release, old analyzer, s3, DatabaseReplicated)", "package_asan", "fuzzers", + "package_aarch64", + "package_release_coverage", + "package_debug", + "package_tsan", + "package_msan", + "package_ubsan", + "binary_release", ], ) diff --git a/tests/ci/worker/.gitignore b/tests/ci/worker/.gitignore deleted file mode 100644 index 4ed18989e78..00000000000 --- a/tests/ci/worker/.gitignore +++ /dev/null @@ -1 +0,0 @@ -generated_*init_runner.sh diff --git a/tests/ci/worker/deploy-runner-init.sh b/tests/ci/worker/deploy-runner-init.sh deleted file mode 100755 index 06edede48fa..00000000000 --- a/tests/ci/worker/deploy-runner-init.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env bash - -usage() { - echo "Usage: $0 ENVIRONMENT" >&2 - echo "Valid values for ENVIRONMENT: staging, production" >&2 - exit 1 -} - -case "$1" in - staging|production) - ENVIRONMENT="$1" ;; - --help) - usage ;; - *) - echo "Invalid argument" >&2 - usage ;; -esac - -cd "$(dirname "$0")" || exit 1 -SOURCE_SCRIPT='init_runner.sh' - -check_response() { - # Are we even in the interactive shell? - [ -t 1 ] || return 1 - local request - request="$1" - read -rp "$request (y/N): " response - case "$response" in - [Yy]) - return 0 - # Your code to continue goes here - ;; - *) - return 1 - ;; - esac -} - -check_dirty() { - if [ -n "$(git status --porcelain=v2 "$SOURCE_SCRIPT")" ]; then - echo "The $SOURCE_SCRIPT has uncommited changes, won't deploy it" >&2 - exit 1 - fi -} -GIT_HASH=$(git log -1 --format=format:%H) - -header() { - cat << EOF -#!/usr/bin/env bash - -echo 'The $ENVIRONMENT script is generated from $SOURCE_SCRIPT, commit $GIT_HASH' - -EOF -} - -body() { - local first_line - first_line=$(sed -n '/^# THE SCRIPT START$/{=;q}' "$SOURCE_SCRIPT") - if [ -z "$first_line" ]; then - echo "The pattern '# THE SCRIPT START' is not found in $SOURCE_SCRIPT" >&2 - exit 1 - fi - tail "+$first_line" "$SOURCE_SCRIPT" -} - -GENERATED_FILE="generated_${ENVIRONMENT}_${SOURCE_SCRIPT}" - -{ header && body; } > "$GENERATED_FILE" - -echo "The file $GENERATED_FILE is generated" - -if check_response "Display the content of $GENERATED_FILE?"; then - if [ -z "$PAGER" ]; then - less "$GENERATED_FILE" - else - $PAGER "$GENERATED_FILE" - fi -fi - -check_dirty - -S3_OBJECT=${S3_OBJECT:-s3://github-runners-data/cloud-init/${ENVIRONMENT}.sh} -if check_response "Deploy the generated script to $S3_OBJECT?"; then - aws s3 mv "$GENERATED_FILE" "$S3_OBJECT" -fi diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh deleted file mode 100644 index ec7793db2aa..00000000000 --- a/tests/ci/worker/init_runner.sh +++ /dev/null @@ -1,397 +0,0 @@ -#!/usr/bin/env bash - -cat > /dev/null << 'EOF' -The following content is embedded into the s3 object via the script -deploy-runner-init.sh {staging,production} -with additional helping information - -In the `user data` you should define as the following text -between `### COPY BELOW` and `### COPY ABOVE` - -### COPY BELOW -Content-Type: multipart/mixed; boundary="//" -MIME-Version: 1.0 - ---// -Content-Type: text/cloud-config; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Content-Disposition: attachment; filename="cloud-config.txt" - -#cloud-config -cloud_final_modules: -- [scripts-user, always] - ---// -Content-Type: text/x-shellscript; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Content-Disposition: attachment; filename="userdata.txt" - -#!/bin/bash -INSTANCE_ID=$(ec2metadata --instance-id) -INIT_ENVIRONMENT=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:init-environment'].Value" --output text) -echo "Downloading and using $INIT_ENVIRONMENT cloud-init.sh" -aws s3 cp "s3://github-runners-data/cloud-init/${INIT_ENVIRONMENT:-production}.sh" /tmp/cloud-init.sh -chmod 0700 /tmp/cloud-init.sh -exec bash /tmp/cloud-init.sh ---// -### COPY ABOVE -EOF - -# THE SCRIPT START - -set -uo pipefail - -#################################### -# IMPORTANT! # -# EC2 instance should have # -# `github:runner-type` tag # -# set accordingly to a runner role # -#################################### - -echo "Running init script" -export DEBIAN_FRONTEND=noninteractive -export RUNNER_HOME=/home/ubuntu/actions-runner - -export RUNNER_ORG="ClickHouse" -export RUNNER_URL="https://github.com/${RUNNER_ORG}" -# Funny fact, but metadata service has fixed IP -INSTANCE_ID=$(ec2metadata --instance-id) -export INSTANCE_ID - -bash /usr/local/share/scripts/init-network.sh - -# combine labels -RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text) -LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE" -export LABELS - -# Refresh CloudWatch agent config -aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -systemctl restart amazon-cloudwatch-agent.service - -# Refresh teams ssh keys -TEAM_KEYS_URL=$(aws ssm get-parameter --region us-east-1 --name team-keys-url --query 'Parameter.Value' --output=text) -curl -s "${TEAM_KEYS_URL}" > /home/ubuntu/.ssh/authorized_keys2 -chown ubuntu: /home/ubuntu/.ssh -R - - -# Create a pre-run script that will provide diagnostics info -mkdir -p /tmp/actions-hooks -cat > /tmp/actions-hooks/common.sh << 'EOF' -#!/bin/bash -EOF - -terminate_delayed() { - # The function for post hook to gracefully finish the job and then tear down - # The very specific sleep time is used later to determine in the main loop if - # the instance is tearing down - # IF `sleep` IS CHANGED, CHANGE ANOTHER VALUE IN `pgrep` - sleep=13.14159265358979323846 - echo "Going to terminate the runner's instance in $sleep seconds" - INSTANCE_ID=$(ec2metadata --instance-id) - # We execute it with `at` to not have it as an orphan process, but launched independently - # GH Runners kill all remain processes - echo "sleep '$sleep'; aws ec2 terminate-instances --instance-ids $INSTANCE_ID" | at now || \ - aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" # workaround for complete out of space or non-installed `at` - exit 0 -} - -detect_delayed_termination() { - # The function look for very specific sleep with pi - if pgrep 'sleep 13.14159265358979323846'; then - echo 'The instance has delayed termination, sleep the same time to wait if it goes down' - sleep 14 - fi -} - -declare -f terminate_delayed >> /tmp/actions-hooks/common.sh - -terminate_and_exit() { - # Terminate instance and exit from the script instantly - echo "Going to terminate the runner's instance" - INSTANCE_ID=$(ec2metadata --instance-id) - aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" - exit 0 -} - -declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh - -check_spot_instance_is_old() { - # This function should be executed ONLY BETWEEN runnings. - # It's unsafe to execute while the runner is working! - local LIFE_CYCLE - LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle) - if [ "$LIFE_CYCLE" == "spot" ]; then - local UPTIME - UPTIME=$(< /proc/uptime) - UPTIME=${UPTIME%%.*} - if (( 3600 < UPTIME )); then - echo "The spot instance has uptime $UPTIME, it's time to shut it down" - return 0 - fi - fi - return 1 -} - -check_proceed_spot_termination() { - # The function checks and proceeds spot instance termination if exists - # The event for spot instance termination - local FORCE - FORCE=${1:-} - if TERMINATION_DATA=$(curl -s --fail http://169.254.169.254/latest/meta-data/spot/instance-action); then - # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-instance-termination-notices.html#instance-action-metadata - _action=$(jq '.action' -r <<< "$TERMINATION_DATA") - _time=$(jq '.time | fromdate' <<< "$TERMINATION_DATA") - _until_action=$((_time - $(date +%s))) - echo "Received the '$_action' event that will be effective in $_until_action seconds" - if (( _until_action <= 30 )) || [ "$FORCE" == "force" ]; then - echo "The action $_action will be done in $_until_action, killing the runner and exit" - local runner_pid - runner_pid=$(pgrep Runner.Listener) - if [ -n "$runner_pid" ]; then - # Kill the runner to not allow it cancelling the job - # shellcheck disable=SC2046 - kill -9 "$runner_pid" $(list_children "$runner_pid") - fi - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" - terminate_and_exit - fi - fi -} - -no_terminating_metadata() { - # The function check that instance could continue work - # Returns 1 if any of termination events are received - - # The event for rebalance recommendation. Not strict, so we have some room to make a decision here - if curl -s --fail http://169.254.169.254/latest/meta-data/events/recommendations/rebalance; then - echo 'Received recommendation to rebalance, checking the uptime' - local UPTIME - UPTIME=$(< /proc/uptime) - UPTIME=${UPTIME%%.*} - # We don't shutdown the instances younger than 30m - if (( 1800 < UPTIME )); then - # To not shutdown everything at once, use the 66% to survive - if (( $((RANDOM % 3)) == 0 )); then - echo 'The instance is older than 30m and won the roulette' - return 1 - fi - echo 'The instance is older than 30m, but is not chosen for rebalance' - else - echo 'The instance is younger than 30m, do not shut it down' - fi - fi - - # Checks if the ASG in a lifecycle hook state - local ASG_STATUS - ASG_STATUS=$(curl -s http://169.254.169.254/latest/meta-data/autoscaling/target-lifecycle-state) - if [ "$ASG_STATUS" == "Terminated" ]; then - echo 'The instance in ASG status Terminating:Wait' - return 1 - fi -} - -terminate_on_event() { - # If there is a rebalance event, then the instance could die soon - # Let's don't wait for it and terminate proactively - if curl -s --fail http://169.254.169.254/latest/meta-data/events/recommendations/rebalance; then - terminate_and_exit - fi - - # Here we check if the autoscaling group marked the instance for termination, and it's wait for the job to finish - ASG_STATUS=$(curl -s http://169.254.169.254/latest/meta-data/autoscaling/target-lifecycle-state) - if [ "$ASG_STATUS" == "Terminated" ]; then - INSTANCE_ID=$(ec2metadata --instance-id) - ASG_NAME=$(aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='aws:autoscaling:groupName'].Value" --output text) - LIFECYCLE_HOOKS=$(aws autoscaling describe-lifecycle-hooks --auto-scaling-group-name "$ASG_NAME" --query "LifecycleHooks[].LifecycleHookName" --output text) - for LCH in $LIFECYCLE_HOOKS; do - aws autoscaling complete-lifecycle-action --lifecycle-action-result CONTINUE \ - --lifecycle-hook-name "$LCH" --auto-scaling-group-name "$ASG_NAME" \ - --instance-id "$INSTANCE_ID" - true # autoformat issue - done - echo 'The runner is marked as "Terminated" by the autoscaling group, we are terminating' - terminate_and_exit - fi -} - -cat > /tmp/actions-hooks/pre-run.sh << EOF -#!/bin/bash -set -uo pipefail - -echo "Runner's public DNS: $(ec2metadata --public-hostname)" -echo "Runner's labels: ${LABELS}" -echo "Runner's instance type: $(ec2metadata --instance-type)" -EOF - -# Create a post-run script that will restart docker daemon before the job started -cat > /tmp/actions-hooks/post-run.sh << 'EOF' -#!/bin/bash -set -xuo pipefail - -source /tmp/actions-hooks/common.sh - -# Free KiB, free percents -ROOT_STAT=($(df / | awk '/\// {print $4 " " int($4/$2 * 100)}')) -if [[ ${ROOT_STAT[0]} -lt 3000000 ]] || [[ ${ROOT_STAT[1]} -lt 5 ]]; then - echo "The runner has ${ROOT_STAT[0]}KiB and ${ROOT_STAT[1]}% of free space on /" - terminate_delayed -fi - -# shellcheck disable=SC2046 -docker ps --quiet | xargs --no-run-if-empty docker kill ||: -# shellcheck disable=SC2046 -docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - -# If we have hanged containers after the previous commands, than we have a hanged one -# and should restart the daemon -if [ "$(docker ps --all --quiet)" ]; then - # Systemd service of docker has StartLimitBurst=3 and StartLimitInterval=60s, - # that's why we try restarting it for long - for i in {1..25}; - do - sudo systemctl restart docker && break || sleep 5 - done - - for i in {1..10} - do - docker info && break || sleep 2 - done - # Last chance, otherwise we have to terminate poor instance - docker info 1>/dev/null || { echo Docker unable to start; terminate_delayed ; } -fi -EOF - -get_runner_token() { - /usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value -} - -is_job_assigned() { - local runner_pid - runner_pid=$(pgrep Runner.Listener) - if [ -z "$runner_pid" ]; then - # if runner has finished, it's fine - return 0 - fi - local log_file - log_file=$(lsof -p "$runner_pid" 2>/dev/null | grep -o "$RUNNER_HOME/_diag/Runner.*log") - if [ -z "$log_file" ]; then - # assume, the process is over or just started - return 0 - fi - # So far it's the only solid way to determine that the job is starting - grep -q 'Terminal] .* Running job:' "$log_file" \ - && return 0 \ - || return 1 -} - -list_children () { - local children - children=$(ps --ppid "$1" -o pid=) - if [ -z "$children" ]; then - return - fi - - for pid in $children; do - list_children "$pid" - done - echo "$children" -} - -# There's possibility that it fails because the runner's version is outdated, -# so after the first failure we'll try to launch it with enabled autoupdate. -# -# We'll fail and terminate after 10 consequent failures. -ATTEMPT=0 -# In `kill` 0 means "all processes in process group", -1 is "all but PID 1" -# We use `-2` to get an error -RUNNER_PID=-2 - -while true; do - # Does not send signal, but checks that the process $RUNNER_PID is running - if kill -0 -- $RUNNER_PID; then - ATTEMPT=0 - echo "Runner is working with pid $RUNNER_PID, checking the metadata in background" - check_proceed_spot_termination - - if ! is_job_assigned; then - RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$RUNNER_PID" 2>/dev/null || date +%s) )) - echo "The runner is launched $RUNNER_AGE seconds ago and still hasn't received a job" - if (( 60 < RUNNER_AGE )); then - echo "Attempt to delete the runner for a graceful shutdown" - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ - || continue - echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" - terminate_and_exit - fi - fi - else - if [ "$RUNNER_PID" != "-2" ]; then - wait $RUNNER_PID \ - && echo "Runner with PID $RUNNER_PID successfully finished" \ - || echo "Attempt $((++ATTEMPT)) to start the runner" - fi - if (( ATTEMPT > 10 )); then - echo "The runner has failed to start after $ATTEMPT attempt. Give up and terminate it" - terminate_and_exit - fi - - cd $RUNNER_HOME || terminate_and_exit - detect_delayed_termination - # If runner is not active, check that it needs to terminate itself - echo "Checking if the instance suppose to terminate" - no_terminating_metadata || terminate_on_event - check_spot_instance_is_old && terminate_and_exit - check_proceed_spot_termination force - - echo "Going to configure runner" - token_args=(--token "$(get_runner_token)") - config_args=( - "${token_args[@]}" --url "$RUNNER_URL" - --ephemeral --unattended --replace --runnergroup Default - --labels "$LABELS" --work _work --name "$INSTANCE_ID" - ) - if (( ATTEMPT > 1 )); then - echo 'The runner failed to start at least once. Removing it and then configuring with autoupdate enabled.' - sudo -u ubuntu ./config.sh remove "${token_args[@]}" - sudo -u ubuntu ./config.sh "${config_args[@]}" - else - echo "Configure runner with disabled autoupdate" - config_args+=("--disableupdate") - sudo -u ubuntu ./config.sh "${config_args[@]}" - fi - - echo "Another one check to avoid race between runner and infrastructure" - no_terminating_metadata || terminate_on_event - check_spot_instance_is_old && terminate_and_exit - check_proceed_spot_termination force - - # There were some failures to start the Job because of trash in _work - rm -rf _work - - # https://github.com/actions/runner/issues/3266 - # We're unable to know if the runner is failed to start. - echo 'Monkey-patching run helpers to get genuine exit code of the runner' - for script in run.sh run-helper.sh.template; do - # shellcheck disable=SC2016 - grep -q 'exit 0$' "$script" && \ - sed 's/exit 0/exit $returnCode/' -i "$script" && \ - echo "Script $script is patched" - done - - echo "Run" - sudo -u ubuntu \ - ACTIONS_RUNNER_HOOK_JOB_STARTED=/tmp/actions-hooks/pre-run.sh \ - ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/tmp/actions-hooks/post-run.sh \ - ./run.sh & - RUNNER_PID=$! - - sleep 10 - fi - - sleep 5 -done - -# vim:ts=4:sw=4 diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py deleted file mode 100644 index 8ed47a54ab1..00000000000 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ /dev/null @@ -1,404 +0,0 @@ -#!/usr/bin/env python3 - -import fnmatch -import json -import time -from collections import namedtuple -from urllib.parse import quote - -import requests -from lambda_shared.pr import TRUSTED_CONTRIBUTORS -from lambda_shared.token import get_cached_access_token - -SUSPICIOUS_CHANGED_FILES_NUMBER = 200 - -SUSPICIOUS_PATTERNS = [ - ".github/*", - "docker/*", - "docs/tools/*", - "packages/*", - "tests/ci/*", -] - -# Number of retries for API calls. -MAX_RETRY = 5 - -# Number of times a check can re-run as a whole. -# It is needed, because we are using AWS "spot" instances, that are terminated often -MAX_WORKFLOW_RERUN = 30 - -WorkflowDescription = namedtuple( - "WorkflowDescription", - [ - "name", - "action", - "run_id", - "event", - "workflow_id", - "conclusion", - "status", - "api_url", - "fork_owner_login", - "fork_branch", - "rerun_url", - "jobs_url", - "attempt", - "repo_url", - "url", - ], -) - -# See https://api.github.com/orgs/{name} -TRUSTED_ORG_IDS = { - 54801242, # clickhouse -} - -# See https://api.github.com/repos/ClickHouse/ClickHouse/actions/workflows -# Use ID to not inject a malicious workflow -TRUSTED_WORKFLOW_IDS = { - 14586616, # Cancel workflows, always trusted -} - -NEED_RERUN_WORKFLOWS = { - "BackportPR", - "DocsCheck", - "MasterCI", - "NightlyBuilds", - "PublishedReleaseCI", - "PullRequestCI", - "ReleaseBranchCI", -} - - -def is_trusted_contributor(pr_user_login, pr_user_orgs): - if pr_user_login.lower() in TRUSTED_CONTRIBUTORS: - print(f"User '{pr_user_login}' is trusted") - return True - - print(f"User '{pr_user_login}' is not trusted") - - for org_id in pr_user_orgs: - if org_id in TRUSTED_ORG_IDS: - print( - f"Org '{org_id}' is trusted; will mark user {pr_user_login} as trusted" - ) - return True - print(f"Org '{org_id}' is not trusted") - - return False - - -def _exec_get_with_retry(url, token): - headers = {"Authorization": f"token {token}"} - e = Exception() - for i in range(MAX_RETRY): - try: - response = requests.get(url, headers=headers, timeout=30) - response.raise_for_status() - return response.json() - except Exception as ex: - print("Got exception executing request", ex) - e = ex - time.sleep(i + 1) - - raise requests.HTTPError("Cannot execute GET request with retries") from e - - -def _exec_post_with_retry(url, token, data=None): - headers = {"Authorization": f"token {token}"} - e = Exception() - for i in range(MAX_RETRY): - try: - if data: - response = requests.post(url, headers=headers, json=data, timeout=30) - else: - response = requests.post(url, headers=headers, timeout=30) - if response.status_code == 403: - data = response.json() - if ( - "message" in data - and data["message"] - == "This workflow run is not waiting for approval" - ): - print("Workflow doesn't need approval") - return data - response.raise_for_status() - return response.json() - except Exception as ex: - print("Got exception executing request", ex) - e = ex - time.sleep(i + 1) - - raise requests.HTTPError("Cannot execute POST request with retry") from e - - -def _get_pull_requests_from(repo_url, owner, branch, token): - url = f"{repo_url}/pulls?head={quote(owner)}:{quote(branch)}" - return _exec_get_with_retry(url, token) - - -def get_workflow_description_from_event(event): - action = event["action"] - run_id = event["workflow_run"]["id"] - event_type = event["workflow_run"]["event"] - fork_owner = event["workflow_run"]["head_repository"]["owner"]["login"] - fork_branch = event["workflow_run"]["head_branch"] - name = event["workflow_run"]["name"] - workflow_id = event["workflow_run"]["workflow_id"] - conclusion = event["workflow_run"]["conclusion"] - attempt = event["workflow_run"]["run_attempt"] - status = event["workflow_run"]["status"] - jobs_url = event["workflow_run"]["jobs_url"] - rerun_url = event["workflow_run"]["rerun_url"] - url = event["workflow_run"]["html_url"] - api_url = event["workflow_run"]["url"] - repo_url = event["repository"]["url"] - return WorkflowDescription( - name=name, - action=action, - run_id=run_id, - event=event_type, - fork_owner_login=fork_owner, - fork_branch=fork_branch, - workflow_id=workflow_id, - conclusion=conclusion, - attempt=attempt, - status=status, - jobs_url=jobs_url, - rerun_url=rerun_url, - url=url, - repo_url=repo_url, - api_url=api_url, - ) - - -def get_pr_author_and_orgs(pull_request, token): - author = pull_request["user"]["login"] - orgs = _exec_get_with_retry(pull_request["user"]["organizations_url"], token) - return author, [org["id"] for org in orgs] - - -def get_changed_files_for_pull_request(pull_request, token): - url = pull_request["url"] - - changed_files = set([]) - for i in range(1, 31): - print("Requesting changed files page", i) - data = _exec_get_with_retry(f"{url}/files?page={i}&per_page=100", token) - print(f"Got {len(data)} changed files") - if len(data) == 0: - print("No more changed files") - break - - for change in data: - # print("Adding changed file", change['filename']) - changed_files.add(change["filename"]) - - if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER: - print( - f"More than {len(changed_files)} changed files. " - "Will stop fetching new files." - ) - break - - return changed_files - - -def check_suspicious_changed_files(changed_files): - if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER: - print(f"Too many files changed {len(changed_files)}, need manual approve") - return True - - for path in changed_files: - for pattern in SUSPICIOUS_PATTERNS: - if fnmatch.fnmatch(path, pattern): - print( - f"File {path} match suspicious pattern {pattern}, " - "will not approve automatically" - ) - return True - - print("No changed files match suspicious patterns, run could be approved") - return False - - -def approve_run(workflow_description: WorkflowDescription, token: str) -> None: - print("Approving run") - url = f"{workflow_description.api_url}/approve" - _exec_post_with_retry(url, token) - - -def label_manual_approve(pull_request, token): - url = f"{pull_request['issue_url']}/labels" - data = {"labels": ["manual approve"]} - - _exec_post_with_retry(url, token, data) - - -def get_workflow_jobs(workflow_description, token): - jobs_url = ( - workflow_description.api_url + f"/attempts/{workflow_description.attempt}/jobs" - ) - jobs = [] - i = 1 - while True: - got_jobs = _exec_get_with_retry(jobs_url + f"?page={i}", token) - if len(got_jobs["jobs"]) == 0: - break - - jobs += got_jobs["jobs"] - i += 1 - - return jobs - - -def check_need_to_rerun(workflow_description, token): - if workflow_description.attempt >= MAX_WORKFLOW_RERUN: - print( - "Not going to rerun workflow because it's already tried more than two times" - ) - return False - print("Going to check jobs") - - jobs = get_workflow_jobs(workflow_description, token) - print("Got jobs", len(jobs)) - for job in jobs: - print(f"Job {job['name']} has a conclusion '{job['conclusion']}'") - if job["conclusion"] not in ("success", "skipped"): - print("Job", job["name"], "failed, checking steps") - for step in job["steps"]: - # always the last job - if step["name"] == "Complete job": - print("Found Complete job step for job", job["name"]) - break - else: - print( - "Checked all steps and doesn't found Complete job, going to rerun" - ) - return True - - return False - - -def rerun_workflow(workflow_description, token): - print("Going to rerun workflow") - try: - _exec_post_with_retry(f"{workflow_description.rerun_url}-failed-jobs", token) - except Exception: - _exec_post_with_retry(workflow_description.rerun_url, token) - - -def check_workflow_completed( - event_data: dict, workflow_description: WorkflowDescription, token: str -) -> bool: - if workflow_description.action == "completed": - attempt = 0 - # Nice and reliable GH API sends from time to time such events, e.g: - # action='completed', conclusion=None, status='in_progress', - # So let's try receiving a real workflow data - while workflow_description.conclusion is None and attempt < MAX_RETRY: - progressive_sleep = 3 * sum(i + 1 for i in range(attempt)) - time.sleep(progressive_sleep) - event_data["workflow_run"] = _exec_get_with_retry( - workflow_description.api_url, token - ) - workflow_description = get_workflow_description_from_event(event_data) - attempt += 1 - - if workflow_description.conclusion != "failure": - print( - "Workflow finished with status " - f"{workflow_description.conclusion}, exiting" - ) - return True - - print( - "Workflow", - workflow_description.url, - "completed and failed, let's check for rerun", - ) - - if workflow_description.name not in NEED_RERUN_WORKFLOWS: - print( - "Workflow", - workflow_description.name, - "not in list of rerunable workflows", - ) - return True - - if check_need_to_rerun(workflow_description, token): - rerun_workflow(workflow_description, token) - return True - - return False - - -def main(event): - token = get_cached_access_token() - event_data = json.loads(event["body"]) - print("The body received:", event["body"]) - workflow_description = get_workflow_description_from_event(event_data) - - print("Got workflow description", workflow_description) - if check_workflow_completed(event_data, workflow_description, token): - return - - if workflow_description.action != "requested": - print("Exiting, event action is", workflow_description.action) - return - - if workflow_description.workflow_id in TRUSTED_WORKFLOW_IDS: - print("Workflow in trusted list, approving run") - approve_run(workflow_description, token) - return - - pull_requests = _get_pull_requests_from( - workflow_description.repo_url, - workflow_description.fork_owner_login, - workflow_description.fork_branch, - token, - ) - - print("Got pull requests for workflow", len(pull_requests)) - if len(pull_requests) != 1: - print(f"Can't continue with non-uniq PRs: {pull_requests}") - return - - pull_request = pull_requests[0] - print("Pull request for workflow number", pull_request["number"]) - - author, author_orgs = get_pr_author_and_orgs(pull_request, token) - if is_trusted_contributor(author, author_orgs): - print("Contributor is trusted, approving run") - approve_run(workflow_description, token) - return - - labels = {label["name"] for label in pull_request["labels"]} - if "can be tested" not in labels: - print("Label 'can be tested' is required for untrusted users") - return - - changed_files = get_changed_files_for_pull_request(pull_request, token) - print(f"Totally have {len(changed_files)} changed files in PR:", changed_files) - if check_suspicious_changed_files(changed_files): - print(f"Pull Request {pull_request['number']} has suspicious changes") - if "manual approve" not in labels: - print("Label the PR as needed for manuall approve") - label_manual_approve(pull_request, token) - else: - print(f"Pull Request {pull_request['number']} has no suspicious changes") - approve_run(workflow_description, token) - - -def handler(event, _): - try: - main(event) - - return { - "statusCode": 200, - "headers": {"Content-Type": "application/json"}, - "body": '{"status": "OK"}', - } - except Exception: - print("Received event: ", event) - raise diff --git a/tests/ci/workflow_approve_rerun_lambda/build_and_deploy_archive.sh b/tests/ci/workflow_approve_rerun_lambda/build_and_deploy_archive.sh deleted file mode 120000 index 96ba3fa024e..00000000000 --- a/tests/ci/workflow_approve_rerun_lambda/build_and_deploy_archive.sh +++ /dev/null @@ -1 +0,0 @@ -../team_keys_lambda/build_and_deploy_archive.sh \ No newline at end of file diff --git a/tests/ci/workflow_approve_rerun_lambda/lambda_shared b/tests/ci/workflow_approve_rerun_lambda/lambda_shared deleted file mode 120000 index ba86e090f6c..00000000000 --- a/tests/ci/workflow_approve_rerun_lambda/lambda_shared +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package/lambda_shared \ No newline at end of file diff --git a/tests/ci/workflow_approve_rerun_lambda/requirements.txt b/tests/ci/workflow_approve_rerun_lambda/requirements.txt deleted file mode 100644 index 4cb3fba0f7b..00000000000 --- a/tests/ci/workflow_approve_rerun_lambda/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package[token] diff --git a/tests/ci/workflow_jobs_lambda/app.py b/tests/ci/workflow_jobs_lambda/app.py deleted file mode 100644 index 4210ca22da9..00000000000 --- a/tests/ci/workflow_jobs_lambda/app.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python - -""" -Lambda gets the workflow_job events, see -https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_job - -Then it either posts it as is to the play.clickhouse.com, or anonymizes the sensitive -fields for private repositories -""" - -import json -import logging -from base64 import b64decode -from dataclasses import dataclass -from typing import Any, List, Optional - -from lambda_shared import ClickHouseHelper, InsertException, get_parameter_from_ssm - -logging.getLogger().setLevel(logging.INFO) - - -@dataclass -class WorkflowJob: - id: int - run_id: int - workflow_name: str - head_branch: str - run_url: str - run_attempt: int - node_id: str - head_sha: str - url: str - html_url: str - status: str - conclusion: str - started_at: str - completed_at: str - name: str - steps: int # just number of steps, we don't keep steps - check_run_url: str - labels: List[str] - runner_id: int - runner_name: str - runner_group_id: int - runner_group_name: str - repository: str - - def anonimyze_url(self, url: str) -> str: - return url.replace(self.repository, "ANONYMIZED_REPO") - - def anonimyze(self): - anm = "ANONYMIZED" - self.workflow_name = anm - self.head_branch = anm - self.run_url = self.anonimyze_url(self.run_url) - self.node_id = anm - self.url = self.anonimyze_url(self.url) - self.html_url = self.anonimyze_url(self.html_url) - self.name = anm - self.check_run_url = self.anonimyze_url(self.check_run_url) - self.repository = anm - - def as_dict(self) -> dict: - return self.__dict__ - - -CH_CLIENT = None # type: Optional[ClickHouseHelper] - - -def send_event_workflow_job(workflow_job: WorkflowJob) -> None: - # # SHOW CREATE TABLE default.workflow_jobs - # CREATE TABLE default.workflow_jobs UUID 'c0351924-8ccd-47a6-9db0-e28a9eee2fdf' - # ( - # `id` UInt64, - # `run_id` UInt64, - # `workflow_name` LowCardinality(String), - # `head_branch` LowCardinality(String), - # `run_url` String, - # `run_attempt` UInt16, - # `node_id` String, - # `head_sha` String, - # `url` String, - # `html_url` String, - # `status` Enum8('waiting' = 1, 'queued' = 2, 'in_progress' = 3, 'completed' = 4), - # `conclusion` LowCardinality(String), - # `started_at` DateTime, - # `completed_at` DateTime, - # `name` LowCardinality(String), - # `steps` UInt16, - # `check_run_url` String, - # `labels` Array(LowCardinality(String)), - # `runner_id` UInt64, - # `runner_name` String, - # `runner_group_id` UInt64, - # `runner_group_name` LowCardinality(String), - # `repository` LowCardinality(String), - # `updated_at` DateTime DEFAULT now() - # ) - # ENGINE = ReplicatedMergeTree('/clickhouse/tables/c0351924-8ccd-47a6-9db0-e28a9eee2fdf/{shard}', '{replica}') - # PARTITION BY toStartOfMonth(started_at) - # ORDER BY (id, updated_at) - # SETTINGS index_granularity = 8192 - global CH_CLIENT - CH_CLIENT = CH_CLIENT or ClickHouseHelper( - get_parameter_from_ssm("clickhouse-test-stat-url"), - get_parameter_from_ssm("clickhouse-test-stat-login"), - get_parameter_from_ssm("clickhouse-test-stat-password"), - ) - try: - CH_CLIENT.insert_event_into( - "default", "workflow_jobs", workflow_job.as_dict(), False - ) - except InsertException as ex: - logging.exception( - "Got an exception on insert, tryuing to update the client " - "credentials and repeat", - exc_info=ex, - ) - CH_CLIENT = ClickHouseHelper( - get_parameter_from_ssm("clickhouse-test-stat-url"), - get_parameter_from_ssm("clickhouse-test-stat-login"), - get_parameter_from_ssm("clickhouse-test-stat-password"), - ) - CH_CLIENT.insert_event_into( - "default", "workflow_jobs", workflow_job.as_dict(), False - ) - - -def killed_job(wf_job: dict) -> bool: - """a hack to identify the killed runner if "Complete job" is omit""" - if ( - wf_job.get("status", "") != "completed" - or wf_job.get("conclusion", "") != "failure" - ): - # The task either success or in progress - return False - return not any( - step["name"] == "Complete job" and step["conclusion"] is not None - for step in wf_job["steps"] - ) - - -def handler(event: dict, context: Any) -> dict: - if event["isBase64Encoded"]: - event_data = json.loads(b64decode(event["body"])) - else: - event_data = json.loads(event["body"]) - - logging.info("Got the next raw event from the github hook: %s", event_data) - repo = event_data["repository"] - try: - wf_job = event_data["workflow_job"] - except KeyError: - logging.error("The event does not contain valid workflow_jobs data") - logging.error("The event data: %s", event) - logging.error("The context data: %s", context) - - if killed_job(wf_job): - # for killed job we record 0 - steps = 0 - else: - # We record only finished steps - steps = sum(1 for st in wf_job["steps"] if st["conclusion"] is not None) - - workflow_job = WorkflowJob( - wf_job["id"], - wf_job["run_id"], - wf_job["workflow_name"] or "", # nullable - wf_job["head_branch"], - wf_job["run_url"], - wf_job["run_attempt"], - wf_job["node_id"], - wf_job["head_sha"], - wf_job["url"], - wf_job["html_url"], - wf_job["status"], - wf_job["conclusion"] or "", # nullable - wf_job["started_at"], - wf_job["completed_at"] or "1970-01-01T00:00:00", # nullable date - wf_job["name"], - steps, - wf_job["check_run_url"], - wf_job["labels"], - wf_job["runner_id"] or 0, # nullable - wf_job["runner_name"] or "", # nullable - wf_job["runner_group_id"] or 0, # nullable - wf_job["runner_group_name"] or "", # nullable - repo["full_name"], - ) - logging.info( - "Got the next event (private_repo=%s): %s", repo["private"], workflow_job - ) - if repo["private"]: - workflow_job.anonimyze() - - send_event_workflow_job(workflow_job) - - return { - "statusCode": 200, - "headers": {"Content-Type": "application/json"}, - "body": '{"status": "OK"}', - } diff --git a/tests/ci/workflow_jobs_lambda/build_and_deploy_archive.sh b/tests/ci/workflow_jobs_lambda/build_and_deploy_archive.sh deleted file mode 120000 index 96ba3fa024e..00000000000 --- a/tests/ci/workflow_jobs_lambda/build_and_deploy_archive.sh +++ /dev/null @@ -1 +0,0 @@ -../team_keys_lambda/build_and_deploy_archive.sh \ No newline at end of file diff --git a/tests/ci/workflow_jobs_lambda/lambda_shared b/tests/ci/workflow_jobs_lambda/lambda_shared deleted file mode 120000 index ba86e090f6c..00000000000 --- a/tests/ci/workflow_jobs_lambda/lambda_shared +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package/lambda_shared \ No newline at end of file diff --git a/tests/ci/workflow_jobs_lambda/requirements.txt b/tests/ci/workflow_jobs_lambda/requirements.txt deleted file mode 100644 index 098e04a9798..00000000000 --- a/tests/ci/workflow_jobs_lambda/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -../lambda_shared_package diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 90fb9611151..0c04d8fb2c3 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -821,7 +821,10 @@ class SettingsRandomizer: get_localzone(), ] ), - "prefer_warmed_unmerged_parts_seconds": lambda: random.randint(0, 10), + # This setting affect part names and their content which can be read from tables in tests. + # We have a lot of tests which relies on part names, so it's very unsafe to enable randomization + # of this setting + # "prefer_warmed_unmerged_parts_seconds": lambda: random.randint(0, 10), "use_page_cache_for_disks_without_file_cache": lambda: random.random() < 0.7, "page_cache_inject_eviction": lambda: random.random() < 0.5, "merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability": lambda: round( @@ -1525,6 +1528,15 @@ class TestCase: start_time = args.testcase_start_time database = args.testcase_database + if args.client_log: + log_opt = " --client_logs_file=" + args.client_log + " " + client_options += log_opt + os.environ["CLICKHOUSE_CLIENT_OPT"] = ( + os.environ["CLICKHOUSE_CLIENT_OPT"] + if "CLICKHOUSE_CLIENT_OPT" in os.environ + else "" + ) + log_opt + # This is for .sh tests os.environ["CLICKHOUSE_LOG_COMMENT"] = args.testcase_basename @@ -2159,7 +2171,10 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool while True: if all_tests: - case = all_tests.pop(0) + try: + case = all_tests.pop(0) + except IndexError: + break else: break @@ -2178,7 +2193,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool description = "" test_case_name = removesuffix(test_case.name, ".gen", ".sql") + ": " - if is_concurrent or args.run_sequential_tests_in_parallel: + if is_concurrent: description = f"{test_case_name:72}" else: sys.stdout.flush() @@ -2438,35 +2453,6 @@ def extract_key(key: str) -> str: )[1] -def override_envs(*args_, **kwargs): - global args - args.client += " --port 19000" - args.http_port = 18123 - args.https_port = 18443 - - updated_env = { - "CLICKHOUSE_CONFIG": "/etc/clickhouse-server3/config.xml", - "CLICKHOUSE_CONFIG_DIR": "/etc/clickhouse-server3", - "CLICKHOUSE_CONFIG_GREP": "/etc/clickhouse-server3/preprocessed/config.xml", - "CLICKHOUSE_USER_FILES": "/var/lib/clickhouse3/user_files", - "CLICKHOUSE_SCHEMA_FILES": "/var/lib/clickhouse3/format_schemas", - "CLICKHOUSE_PATH": "/var/lib/clickhouse3", - "CLICKHOUSE_PORT_TCP": "19000", - "CLICKHOUSE_PORT_TCP_SECURE": "19440", - "CLICKHOUSE_PORT_TCP_WITH_PROXY": "19010", - "CLICKHOUSE_PORT_HTTP": "18123", - "CLICKHOUSE_PORT_HTTPS": "18443", - "CLICKHOUSE_PORT_INTERSERVER": "19009", - "CLICKHOUSE_PORT_KEEPER": "19181", - "CLICKHOUSE_PORT_PROMTHEUS_PORT": "19988", - "CLICKHOUSE_PORT_MYSQL": "19004", - "CLICKHOUSE_PORT_POSTGRESQL": "19005", - } - os.environ.update(updated_env) - - run_tests_array(*args_, **kwargs) - - def run_tests_process(*args, **kwargs): return run_tests_array(*args, **kwargs) @@ -2494,36 +2480,16 @@ def do_run_tests(jobs, test_suite: TestSuite): # of failures will be nearly the same for all tests from the group. random.shuffle(test_suite.parallel_tests) - batch_size = max(1, (len(test_suite.parallel_tests) // jobs) + 1) - parallel_tests_array = [] - for job in range(jobs): - range_ = job * batch_size, job * batch_size + batch_size - batch = test_suite.parallel_tests[range_[0] : range_[1]] - parallel_tests_array.append((batch, batch_size, test_suite, True)) + batch_size = len(test_suite.parallel_tests) // jobs + manager = multiprocessing.Manager() + parallel_tests = manager.list() + parallel_tests.extend(test_suite.parallel_tests) processes = [] - - for test_batch in parallel_tests_array: + for _ in range(jobs): process = multiprocessing.Process( - target=run_tests_process, args=(test_batch,) - ) - processes.append(process) - process.start() - - if args.run_sequential_tests_in_parallel: - # Run parallel tests and sequential tests at the same time - # Sequential tests will use different ClickHouse instance - # In this process we can safely override values in `args` and `os.environ` - process = multiprocessing.Process( - target=override_envs, - args=( - ( - test_suite.sequential_tests, - len(test_suite.sequential_tests), - test_suite, - False, - ), - ), + target=run_tests_process, + args=((parallel_tests, batch_size, test_suite, True),), ) processes.append(process) process.start() @@ -2559,15 +2525,15 @@ def do_run_tests(jobs, test_suite: TestSuite): sleep(5) - if not args.run_sequential_tests_in_parallel: - run_tests_array( - ( - test_suite.sequential_tests, - len(test_suite.sequential_tests), - test_suite, - False, - ) + run_tests_array( + ( + test_suite.sequential_tests, + len(test_suite.sequential_tests), + test_suite, + False, ) + ) + return len(test_suite.sequential_tests) + len(test_suite.parallel_tests) else: num_tests = len(test_suite.all_tests) @@ -2994,6 +2960,15 @@ def main(args): else: print(colored("\nNo queries hung.", args, "green", attrs=["bold"])) + if args.client_log: + if os.path.exists(args.client_log): + with open(args.client_log, "rb") as stream: + content = stream.read().decode() + if len(content): + print("Has fatal logs from client:\n") + print(content) + os.remove(args.client_log) + if len(restarted_tests) > 0: print("\nSome tests were restarted:\n") @@ -3386,23 +3361,19 @@ def parse_args(): parser.add_argument( "--replace-replicated-with-shared", action="store_true", - default=os.environ.get("USE_META_IN_KEEPER_FOR_MERGE_TREE", False), + default=os.environ.get("REPLACE_RMT_WITH_SMT", False), help="Replace ReplicatedMergeTree engine with SharedMergeTree", ) parser.add_argument( "--replace-non-replicated-with-shared", action="store_true", - default=False, + default=os.environ.get("REPLACE_MT_WITH_SMT", False), help="Replace ordinary MergeTree engine with SharedMergeTree", ) - parser.add_argument( - "--run-sequential-tests-in-parallel", - action="store_true", - default=False, - help="If `true`, tests with the tag `no-parallel` will run on a " - "separate ClickHouse instance in parallel with other tests. " - "This is used in CI to make test jobs run faster.", + "--client-log", + default="./client.fatal.log", + help="Path to file for fatal logs from client", ) return parser.parse_args() diff --git a/tests/config/users.d/insert_keeper_retries.xml b/tests/config/users.d/insert_keeper_retries.xml index f01157d40ac..0cf41473616 100644 --- a/tests/config/users.d/insert_keeper_retries.xml +++ b/tests/config/users.d/insert_keeper_retries.xml @@ -4,7 +4,7 @@ 100 0.01 1 - 1 + 10 diff --git a/tests/ci/lambda_shared_package/__init__.py b/tests/integration/test_accept_invalid_certificate/__init__.py similarity index 100% rename from tests/ci/lambda_shared_package/__init__.py rename to tests/integration/test_accept_invalid_certificate/__init__.py diff --git a/tests/integration/test_accept_invalid_certificate/certs/ca-cert.pem b/tests/integration/test_accept_invalid_certificate/certs/ca-cert.pem new file mode 100644 index 00000000000..c4f88831126 --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/certs/ca-cert.pem @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIFhTCCA22gAwIBAgIUZHeZ4ME7rL1RYGJZGPgOTHl7btMwDQYJKoZIhvcNAQEL +BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwNzAx +MDUxMTI0WhcNMzQwNjI5MDUxMTI0WjBSMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQsw +CQYDVQQDDAJjYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALf623IC +1r6Bzwa4dTufRPvFWQinT4uIBv7UvAk8tu8kmLSMOOC2COALNRCSu0s1SFcO6i5G +CgmaWdoTXuw+NYH9n5rU06N991NBfCoLbf0eSoie7bRdTCKkqWWKaw+dCvKL5vWj +LEZRmdh8ZN0lxa05eIsLxErvgWqE8hOOQ7jgRqYLAagp6XkMdB+YRkXFtbVcze83 +ZNxFqNjdgwYYybaqDZKGeOnRDRof7P/cyMkbGNUI7+JYA5eGmDgiOTGbaCZx5AUK +DKmkdmy5IJBWS0rYR8Ol9Sjs6NSYkmYiLFEWFNSvI88K99+Ab3DEi9JMEqMeNTJ5 +1sx48e2OfMG7ywoGj3D3zf7SUxAfwW+7Ls+v7c/kB9hvspA9iVsqjAKcSPAPnJXk +zR5r+zcPwjb/dvGGGB+D7PhvreEpj0n7PRFEB07jviA1E8zkwdk1L9FefYMG2oRl +8J7miWxoyqUOtA6AYhbN36+I0iJkjxIjXLLzB/dhOzerNTzRHcJtmgSyIpcPIV42 +IsJBttLOW0Mpkfw8z88xji+1VWHm+H8sOWCzbfjncScu/z5Xhb92C2T+AG+RpakB +LwyTsovf+xv1gGF/1gmmdKSoV/JZuHSczMmmDLJ9vXRnIpK0AgL16lfs6o0C2Pe2 +OHJv0oViokU5mW+Mge3U9kUrh6BVITk8UjUDAgMBAAGjUzBRMB0GA1UdDgQWBBRA +x0+tgav3Q2gRt34AtKZiGfFH7zAfBgNVHSMEGDAWgBRAx0+tgav3Q2gRt34AtKZi +GfFH7zAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQBcV7s7au6r +bEkHaW9RbGe/7iHesOcH2uyiALGeiCF+pfN9QbfkpJJbMRDhKr+sVqJH6lS+Iys5 +MpPaRHOInNs5ajiPQcIxtxAnw2HEsy6LUVvs6RUHrS7Bxlc406YQCY/sPyItAt8t +U/qhx5VNj6Mx1smyKZHFHZIZ0eYieriJkK6NB2E7Ji6EPGe7lkh3kEoVTfaSVCkP +p3d4U603kzCk8PGkYlhjWUCVMhDRwgEy6FXHmU156HivGpYurKcab7cMUfr50MyK +1Mx/NaHOYHhIIQ87t9wF2yQaMskEcyK9XOorp+HFtnTdci+6+J3u2Kmyckq81uy9 +uZreH8bUKgmntOhSqtl4tWGMWxX78c7rX2cGIbSJsZzzjmUdhvnpYeuhCCrUPzY3 +kIt9Q6q7DCKG+f+D4bHQ292emEfKYb9LgftyOX3pcAJ+d3c56Cs7c0RRVFAZyVnX +1O9ZvHrypmii5RARNuyVtttgDpuGBrRCa2kSNlXD9PJJitI2kfoWrCI/88Pw8H/y +bLO9/ZcCOsxBGmfrQDMUSC8rE8JLUSKV80otWwtHcxcnfpsobs64/Rz6aOLYEiS4 +/0M6xyUYdOsEGUh1uyX5qxto8iOWop4S9DLy1QVx+N8/fC6gjhMb4Vg2oTBnHlGr +JcxdYP8jVe6tUgvRKAyNlccPIq20udfGTw== +-----END CERTIFICATE----- diff --git a/tests/integration/test_accept_invalid_certificate/certs/ca-key.pem b/tests/integration/test_accept_invalid_certificate/certs/ca-key.pem new file mode 100644 index 00000000000..66d548d0cf5 --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/certs/ca-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC3+ttyAta+gc8G +uHU7n0T7xVkIp0+LiAb+1LwJPLbvJJi0jDjgtgjgCzUQkrtLNUhXDuouRgoJmlna +E17sPjWB/Z+a1NOjffdTQXwqC239HkqInu20XUwipKllimsPnQryi+b1oyxGUZnY +fGTdJcWtOXiLC8RK74FqhPITjkO44EamCwGoKel5DHQfmEZFxbW1XM3vN2TcRajY +3YMGGMm2qg2Shnjp0Q0aH+z/3MjJGxjVCO/iWAOXhpg4Ijkxm2gmceQFCgyppHZs +uSCQVktK2EfDpfUo7OjUmJJmIixRFhTUryPPCvffgG9wxIvSTBKjHjUyedbMePHt +jnzBu8sKBo9w983+0lMQH8Fvuy7Pr+3P5AfYb7KQPYlbKowCnEjwD5yV5M0ea/s3 +D8I2/3bxhhgfg+z4b63hKY9J+z0RRAdO474gNRPM5MHZNS/RXn2DBtqEZfCe5ols +aMqlDrQOgGIWzd+viNIiZI8SI1yy8wf3YTs3qzU80R3CbZoEsiKXDyFeNiLCQbbS +zltDKZH8PM/PMY4vtVVh5vh/LDlgs23453EnLv8+V4W/dgtk/gBvkaWpAS8Mk7KL +3/sb9YBhf9YJpnSkqFfyWbh0nMzJpgyyfb10ZyKStAIC9epX7OqNAtj3tjhyb9KF +YqJFOZlvjIHt1PZFK4egVSE5PFI1AwIDAQABAoICACPKkBVqevddznYqvVfHgMaQ +GmWGEwH8iQvbO+d6IcZ9Mp3xJ4GDuE9Jrt2WBgy4BWY2VxWAd6RtnCxCF3XTFlq/ +CeU3oSKNt2dBdpPBXHjhAzP34xtzCQQ2T2QbHP/RF9eeouRHha36YvkMHh0to3qz +EwHN3LaNSHp9+ZAXXZJiQyw7OOvLl4l+hDq/R3B8aYCqWL/AYYU+of3qffpaKSe5 +hmFqeqxWCHv8GQGfpIoHFPogvnYklDJOs188PS8q4gSUpHzceyEnhwhYrX6W4xfg +4nkr2HlJOXReaCCpZkCyEIRdvBQjuJwI4uUnv2G+feOw1T2IGWL25m3GCfwweeuZ +AWqqbj5Xnl1+VgvaNm68aHBVlfZ14rIJ/Hu1yJV/E+5m8+3xewAor0jshWceC3OS +lhrZp+EyrwxsKj17s2IBhk4EIOLjfosoDqdw3E2DrtqKrnI63MLUTaEqHWyXnF82 +Mn880DR/E3Byj2kKtdIVesAdO0ARtIRqedPunpcvhp1eMZ2wrLJXAHMDBJlUZF64 +ZAyO3ll79PhBn/QShGu+KQ682Ns1T3j3J14n2zqBKsU1GzSLBOZO8QXfFELsv6Xj +NGYPg2SbkRwbYHEPr6G9ehsxKaTpMAtPesdz1devKDI4nSpd2qE9ZfREH+CW0MkK +bpiL20d4blIG1DP84qodAoIBAQDEBXbn0IEeyL3j50/AUJY/K4seeDm1ltJmhoNU +HC0dwt2FOHrD1J/Cx6S19zuEVt1qMwJXELxOVzixykKPy73ETW+Qfcpo2J51THrl +fSEUpoheD37Cwjv9vzb6Qb4ZXDr4acLIB06mAUKfo0Pez2x6IybWqS/dZoD1bfh5 +wiSCITXQB4n6mI7chfDDyTqgf3C5U5b2UABLDdIKQZHY5kX55Vfr0Lz+Gt2t6uUy +hUiN4C+Qrx1LoHY4x7g1SG9lKASG5zv5dksc+a6uYsS4N8TM3ZQz7BnZrCQPNiWz +K70p5pMjdVTHmQqThoww9y+rRiHMU3qUSZ0tX4nrY99/MiFNAoIBAQDwRi0GzkeA +ECWfr2R+vh5lnxXBeAvM46v+8CzwJHNxtGAhB1ojod7GxOxHiMnWDXubRiNVlhx0 +OHeCq11hSQh2+a7s8xwxYbC3paYf1ZQq81YmTGd8v+/QX9b21Kq9dpx+G2C3eikZ +rHMuSifrSEwZMxdw/gKvQ+x66PTNeK8fIgAtZsE96VX3u7sCCNhbBElWWg5OwwMf +0RH5Pi/O8UXxkZs2SUKFaXUFMxjYn9r9636QI8LaLsCvcbPjrhshPtHWHqgrZzia +8gD00ty3mcydRU//xNfHruew2iRZUVVB8E0GQ80eQCsHrI0YkP876at55DKbyZLi +pGisswDdyoePAoIBAQCfOFUSDMGksKfVY8Da1FqGKydQc5Vkk4KJdr9b5Zsb7nmi +yXKmeKHgQ+vdRlmYsDofFhnTgKuD8sMBXuncBUE6Hy/8rMBpk3J6ksnMeGwTawWf +aXSsW7sA7H2KnzX7QXhA6R8m/RB8NM/Z/XKWm+Nf/geBG2Hes3ESaeYHeJPRA4rD +VCxWWe7I3x7m2R/+2CVG08fugKMMU8iFPidqh/jU994HlMC+ojHoH8FV3P9HBo98 +NVFVaZw9mLRey1x4TR4IOfD0O4rdmU2zJlTl0Z7QTTchSajhrzqHBJNwyplXXIut +pJNdoUj+9a4P4+hbdZbkcfZCVt0NyumrQq96kMyNAoIBAQC8Jcemd6tHnd1IYPip +wshcvSk1GYLsCzAo7vvRhEmeNSe9VVuBYYToU1XOYk/ivSYYneCXaXeYjosPGI+D +2cO2HqiaxXhDNwNBm4YtkXp385p3XQcAifAJVLMpD7kvqYy7t0aSBiffAcoj6k++ +Dxv/86EcituhUOgCCx9/Mky3bVGEO92AVJJL7oZu/mwlQYMICnb/t/dfV8DiUo7v +VsF1e5Z1+QINJFpNGwkOwIakpV6zEj3Hyip6d+fslu6Jpv570//OFQMfWffjvqa1 +Ysr2MEuHtiVeKRkmxHsIMgc4HirCiB872/WWCclm/kvPJ5huV4JFuZ+6uBF7gqRg +CPMDAoIBAGs0KCL3a/eXIPWhFcOgtpcNTb3o4gK/s38+GLIVbdP7MUNEkMeWq+Pf +PBUca2p1M8CkbpRi0OAvg1gewAR/+bCdyzoipl1+3v3goYC3J5TAs5NT/siBcjrm +QcTKEHmTeVu82Kpi1JVtcfS2TWuSmT6vSB2A2H3R4GEPQQXnXeS/FnRA0I85MFzq +zxMkar5VY7Eg0Z3/bN5dNqAYSzqjgMMhRMPN1W54gjL2ev65Mzt1jYf6xz7gpJOh +6Fw2EOrM23c4OckZx2dZ7Je6ns2U82D81ffhXRwWfQxoKXvMVMccBJmHTbkVEZlE +VAq9BWwNistc+YfRka1OOeq37MgpXq8= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_accept_invalid_certificate/certs/client-cert.pem b/tests/integration/test_accept_invalid_certificate/certs/client-cert.pem new file mode 100644 index 00000000000..2dd497409c3 --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/certs/client-cert.pem @@ -0,0 +1,30 @@ +-----BEGIN CERTIFICATE----- +MIIFLzCCAxcCFA6o2DM1UGQ1NNCQzp0n4hb3iSd9MA0GCSqGSIb3DQEBCwUAMFIx +CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDcwMTA1MTEy +NloXDTM0MDYyOTA1MTEyNlowVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UE +AwwGY2xpZW50MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAl9i6heiD +2inNs0lKt7XjK0QyXKyIg63uVO39ZpLY5sj1noZnbFmRo5MYC4MMIrlD3wGnqUo/ +mdev2eg8qYry0CmzhLscVEha7LN+7wmdftYSGkXOl9E08KIv9Uw2bHap4KEIRvJ6 +298bdsuv1l4ljWo3VDkPCczXIaUi94/o1uNu0sbPRluBP8kjFLhwV8Ph/7mxsofu +V5tDYNOZ8VozA7aHAnJqXBCFZuW6igIH3XaROY9LOe2FD+qxtpqJHp7E+ZNsXXH5 +SXMQzE2/X7eKHA7zvdG4z5GLgmz3GEvq3ZBJUGUe6DCnrxqBXUeLTqyummohGE57 +LuqzrH9etFj/91NaZW9uwN42pxGJSkeZaJ5BDnzO3VYxqvvDnFs/2TFbJiZ065vI +NpA0PnuAkGjztaNAm6qXNTXsWo8bdi8TDgIwKxoo5KIlWsxnxKB82tfBovWd+JLa +xLlWNKbKgNb6xc9OmNCyhYkRXhqIsBrpLy6n2yppyYEiCBzPrqwJ2p3Y88yj3dsc +xiiu0ngFSroE2pLwCgi1rQY1x9VO4XQrrq6vBvPhi530+cdXdVTnnyBhaOMX4bWX +OOjI97xJmR/YbOvpgKw9v/na82t2fgKoqIs124iZ21QOiffOIfwwg2AKj8EaWJ9/ +Era8sZMsq/g56PWYwT1J85cCMvbNwGOXjusCAwEAATANBgkqhkiG9w0BAQsFAAOC +AgEAAPSps9k8HEZrfBbzH5SHASNTUc7F81h2EYhaEuQrXoan39u3syCKYMSJDt0P +AVCYbIvXmlHeaBba9YXtZVZJpCM53b+x01297x1bXzRklYxnpuD2IauuQgiaJBXV +VlCjEy7GHBAmRQwV7llkoxx+qHsA0b+oHKvTRc9FJzk/Fx4Eb496gNMvUk47f+15 +WBmWoQolqwbH1Ivhca7BpTvTSeNGkJ14cbmuhwryLstHJCPep+vp2TYS9bHFZIPY +g3APnWv1qShuA7Oj68GLpRVJmImgyPMK0i8LDGcLhrDiG+HYEr89MiRWDvBHun7O +Sp5y8zDnDZSrN+EtRp8DypHeB7R0Ukmn76NP6NsigWCPMlLaHNVsvVAwWwFNae9u +OxsoW07tJo3lZjDx+tFrUb7IRyn/9PLDIwEs4SvDP+O97o0JkCszwfRhkDnjY7Qj +XCgXoCBalIadgnGshbEjFLH0ZSwAoCIC/35X0Zo7cd1+Fz4Xq1tgnV5ec6ci/EHi +qsOmGfTqZz5HJlmfyxUOYicizV6SjNZSFWY07ebYRewx6ymdDFhqrbnJIEUGJYlU +5qDY1nemoqh9QRIhm1+73hAKHtmcaa1u72gj1QBtRD5eG3ToM3GHgdbUUPvFpenx +UPfzw/wbzITSqPoYr1/XIGq9e1muTqHbFNlGVByIW0ejeZQ= +-----END CERTIFICATE----- diff --git a/tests/integration/test_accept_invalid_certificate/certs/client-key.pem b/tests/integration/test_accept_invalid_certificate/certs/client-key.pem new file mode 100644 index 00000000000..09c04c1e815 --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/certs/client-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQCX2LqF6IPaKc2z +SUq3teMrRDJcrIiDre5U7f1mktjmyPWehmdsWZGjkxgLgwwiuUPfAaepSj+Z16/Z +6DypivLQKbOEuxxUSFrss37vCZ1+1hIaRc6X0TTwoi/1TDZsdqngoQhG8nrb3xt2 +y6/WXiWNajdUOQ8JzNchpSL3j+jW427Sxs9GW4E/ySMUuHBXw+H/ubGyh+5Xm0Ng +05nxWjMDtocCcmpcEIVm5bqKAgfddpE5j0s57YUP6rG2mokensT5k2xdcflJcxDM +Tb9ft4ocDvO90bjPkYuCbPcYS+rdkElQZR7oMKevGoFdR4tOrK6aaiEYTnsu6rOs +f160WP/3U1plb27A3janEYlKR5lonkEOfM7dVjGq+8OcWz/ZMVsmJnTrm8g2kDQ+ +e4CQaPO1o0Cbqpc1Nexajxt2LxMOAjArGijkoiVazGfEoHza18Gi9Z34ktrEuVY0 +psqA1vrFz06Y0LKFiRFeGoiwGukvLqfbKmnJgSIIHM+urAnandjzzKPd2xzGKK7S +eAVKugTakvAKCLWtBjXH1U7hdCuurq8G8+GLnfT5x1d1VOefIGFo4xfhtZc46Mj3 +vEmZH9hs6+mArD2/+drza3Z+AqioizXbiJnbVA6J984h/DCDYAqPwRpYn38Stryx +kyyr+Dno9ZjBPUnzlwIy9s3AY5eO6wIDAQABAoICAAl8R6ICPpCcEXrUb5qn2Ujy +IJQdT+D0yJqGTD6QCDvsLDstGcWMcx1him5785IxJ9YpZBbnidO0euXJo8R3Ql8S +aGHK4ZtfFyeKAYNoJ0mSIx03LdiRAEOJ7QDf2LI017yJS9IkeW5ql0fTas0za2Jm +nCBZZrNNzvv7a/BEn8NHkfMQV5Fgy9ESvP3qcHhsNHly/aeD5ZEmbZcmWeb82orS +nxEnR0hUSnI0JpibsycT90DimAOh3LDXfd3E0Iz9VsA8urMpqtpDYzZ1tShed7TP +tyiaxS1+PUNIkEc9xRv2i2FQyhVmpmZdoCXfNi84nUtWhJYE6HkrmgRwnnLQp5rr +4stSQLo9X/5ML/bo2cj1eAiH+P6bAWaZhkMYPGkhBYwwgYqZF0Jn2Ay07BPF5gqq +rDY0mjF+Nm/NOqHfQjWHSj7KYYHm6qU1s5cAnDWLfznXGWZHpoI4UqAZtpAwGWEh +f/nS2LyepFW8RE6mbRDe+PkOM2455QdM8MuQc8drGInJ57A5e7y1LxOvFaUP+QtG +fqXok95XciMfttcR8nfYkUyC5QX+JmDZRf1U/DUxTJtbKXRKVG8WaFagkViiCniw +Q4RbrS4Utu6h+8HXdI9mK2pIHlJPp202b9gPry1j3kya8/jx5cTFWCcPj4IlgY51 +ZC1+vXJUezwq60gWJrRtAoIBAQDLsR9M+ZdBC5sl5fiVGsznAvfEVv/Cf7pmeXqt +q4BZT7kB8zfdfRRMzmZiQIX0zTbBn7Q7kWULcl8HgPbMEGZorc24CFuCq8k0n2ji +0aJsTJlfr7kUfTZ5c3HN1fGhDrnGoCL1eElt/Es4hZTIDnbn3i+RoCqwhCqhC0Lo +L7f6iHrVIcKyoaz+Cye9QKA89PVHMNM0f41zFJTVpSzGfWefG4vNoeU6nxqwkWVJ +Guhkt1EsJUScHueAa7hUYlELDy99tlqOPLdG7/GG6LmE6pM7R1E6DishnL1VgOll +KL6fouoz8nTve59eCDQECNzXN8lxPvx6TdufabGJTSZ3iNjPAoIBAQC+10DFlO3Y +XLHfX9LWK4rdkghx9AoeCFIVZO2G29H4zyDQn7hAMISWeVCqdAKhPQMJmAXyhQcc +PLEUdKVwifbPNO/IxsWL8SOfbI28XoZLYkQLual6JPa10jemFsinVRTPLU7RA1/T +7t/mWvyaAHmODWUPKSs1ltzh5dNwvnSNou6h72TytwlMIk5h8Z8ZtMIY2v5Ca2Mx +I7Wi8/5RebpszBbuLYy0GBqK5oqQov1YH0WK3ae4leaPWlZ49nyNYpx5Bvngm+d8 +4F+HvcxdpckRPfenl9nBW+0UHVXZORepQ+g0CNJxAQTomXy5VeEnrpiylrec4mE7 +YD15GKDxeHclAoIBAQC6efcr4zwDwnpRIc6HwrdfriniR7tx9rlRY1lacS3ZldDL +5jx4uy/AeRzmSifYy56beHfqKNuWtSgmmHxYqZL3tlYlaxFL+bUERgQh0sWd3Cp1 +nUwVY+RhH71KMdk6F18gZi5bB7pBxL2gizqARC27bH+kFDvAyd60/LhOFJGb/SJm +yLeUDctv8Ap7ihSoeaGrgRl6cGOwvWDcjpEFTn10/iTpJpPTLSGkEBfn64hh3Cml +/qBnRSuvN+7S4gdL1qUls7IHkbJAcR5u4dQlyRLtLAdC3QHDjdkP1CrX/4s0XlOH +JyrCrCxkS6JDJ1H2fg4fbQh7d5few0ZTapwSxuelAoIBAQCloou8MTuR6zoShsem +TRp0npen18ikMlKT8QpbXxTJ33Q6vjWbuMCLHXYOyyuxKBQn0oNu71ixgFetSmSA +3a2SP/7SAvtYL43cg8ossNncOciXaKgYtFZQjsjv5Zx351dtTYS30jbs6cn41AWj +4D+gNC3/T5sKIwtJIUE3/IWKnNyDPhUeuBBgvPHM/no5RHKmQgQmP7BS6QxwB1c2 +/fD5FtXcGTqi4LzAy8F+MYizYEjqcuFsSuZWi3C8AisuR2VJge2Eravn7ZHcbbYf +nZF63o7BLgG4WgHV0HQ/WdcYeTDjYLjNjbXl77mZ3ljSKS6s6jUo/C6p6yjPnrmp +ewXlAoIBAHX9aLtoj06zU5dtrMVjhYip6Z5LEtHsXCvOmnAl91uUXKwtMze6Y7xc +4ZggVWBR9lFyq9bAAuIXkRuE1jbvEwl5sLZY5Hf6GrIUR2uD7ZYFbogBTgmWCX9S +vYRpofEwquX0AUt+a4nO09wPBX1BJMViAEZSAWGfCfzVz8rXYIgcmu471wrpQHv4 +/whNA90JX0v6tuOASgR2dGorU//0ejGcm0bcYSuivKPxBfmVNeZNgIJXLM0ZHql1 +qwLnWXS0e0kADWhp/GAWVo0xzefgD3mDmyG2Qbz8MLaDkwPU/L073ZZNT1AHFZZE +xfVijyydtPqa/JDgqcUFBxLtiKGTbbI= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_accept_invalid_certificate/certs/client-req.pem b/tests/integration/test_accept_invalid_certificate/certs/client-req.pem new file mode 100644 index 00000000000..507db7bae6c --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/certs/client-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEmzCCAoMCAQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xp +ZW50MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAl9i6heiD2inNs0lK +t7XjK0QyXKyIg63uVO39ZpLY5sj1noZnbFmRo5MYC4MMIrlD3wGnqUo/mdev2eg8 +qYry0CmzhLscVEha7LN+7wmdftYSGkXOl9E08KIv9Uw2bHap4KEIRvJ6298bdsuv +1l4ljWo3VDkPCczXIaUi94/o1uNu0sbPRluBP8kjFLhwV8Ph/7mxsofuV5tDYNOZ +8VozA7aHAnJqXBCFZuW6igIH3XaROY9LOe2FD+qxtpqJHp7E+ZNsXXH5SXMQzE2/ +X7eKHA7zvdG4z5GLgmz3GEvq3ZBJUGUe6DCnrxqBXUeLTqyummohGE57LuqzrH9e +tFj/91NaZW9uwN42pxGJSkeZaJ5BDnzO3VYxqvvDnFs/2TFbJiZ065vINpA0PnuA +kGjztaNAm6qXNTXsWo8bdi8TDgIwKxoo5KIlWsxnxKB82tfBovWd+JLaxLlWNKbK +gNb6xc9OmNCyhYkRXhqIsBrpLy6n2yppyYEiCBzPrqwJ2p3Y88yj3dscxiiu0ngF +SroE2pLwCgi1rQY1x9VO4XQrrq6vBvPhi530+cdXdVTnnyBhaOMX4bWXOOjI97xJ +mR/YbOvpgKw9v/na82t2fgKoqIs124iZ21QOiffOIfwwg2AKj8EaWJ9/Era8sZMs +q/g56PWYwT1J85cCMvbNwGOXjusCAwEAAaAAMA0GCSqGSIb3DQEBCwUAA4ICAQBr +ZWJGghyeCUESKu9v9dgQYD4Bq5YviYY7BAjdQsn5/Ti+tqkAKHEn/L+264QJMrTU +muTKg09o7n7r+g4sHmZhT2QEtwGFwaEGvz2zRd4po29WlGQAN5a8yzXIb8b9n+C4 +Z5APeAzjBV9ooqbJCURlgeDP247XTcU8yIvvX0YpLmXYRQ5Na8TTfdBdlD3Y23mD +aPe+2YGkK1Go4iNMoyTEoJ0cj+89WMA9o+BiGcptALYMzKBM26f/vz9hb0J49RDV +MkccL8TKNDJQrLFCIcwpLRw08uZZdqJ/cy01hrtkMkp+SSBQO9qSmalDrTLgs4Pp +3ml88OIsQHMkdBUjUAw5FyAJoik4iXab/sLoMmMJ/lqH0in3dT4KeEh8Xe2iAfNU +97/6lAuZOrn2EPvqBJI/gfjwPj9wTgjlvLpmcjhBvfqkuM/dd7zTmwRoM1klRMC2 +07x2YNoG7cGXjAYk1h5HDYVJqdphgQLTS1nRqmiQ8QnPryqlZcI7T/GgpOOulHce +lgMp6FFlGaQMysCbdYJQk4JMQXjwlj5ms/6rZSkgwmrv6WhCY7ImDj2E/7oE5XJd +BqXHFwOQdCSgQOVH+QgmxceYolI73cMNF3Frq8/IztAOtMyBF0bpRpI1U6a3ALf4 +cK2Dkd/TLg8ZwsVBjPaRrtXlntO6tB+tn3suukCKcg== +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_accept_invalid_certificate/certs/generate_certs.sh b/tests/integration/test_accept_invalid_certificate/certs/generate_certs.sh new file mode 100755 index 00000000000..610a1281f8b --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/certs/generate_certs.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# 1. Generate CA's private key and self-signed certificate +openssl req -newkey rsa:4096 -x509 -days 3650 -nodes -batch -keyout ca-key.pem -out ca-cert.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=ca" + +# 2. Generate self-signed certificate and private key for using as wrong server certificate (because it's not signed by CA) +openssl req -newkey rsa:4096 -x509 -days 3650 -nodes -batch -keyout self-key.pem -out self-cert.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=server" + +# 3. Generate client's private key and certificate signing request (CSR) +openssl req -newkey rsa:4096 -nodes -batch -keyout client-key.pem -out client-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client" + +# 4. Use CA's private key to sign client's CSR and get back the signed certificate +openssl x509 -req -days 3650 -in client-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client-cert.pem diff --git a/tests/integration/test_accept_invalid_certificate/certs/self-cert.pem b/tests/integration/test_accept_invalid_certificate/certs/self-cert.pem new file mode 100644 index 00000000000..7b3832a3b6b --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/certs/self-cert.pem @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIFjTCCA3WgAwIBAgIUFpPVJytX13EWXwaLqg/rOTdu4UgwDQYJKoZIhvcNAQEL +BQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGc2VydmVyMB4XDTI0 +MDcwMTA1MTEyNVoXDTM0MDYyOTA1MTEyNVowVjELMAkGA1UEBhMCUlUxEzARBgNV +BAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0 +ZDEPMA0GA1UEAwwGc2VydmVyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKC +AgEAsQO7f1Ar0ThZjh00KeuckyJDf/oPQ+67EX2Y82W4/dl8KmHksmIKp2+CJFwg +Y7/c1ti+DjxVXr4tZynzUeDYqujTrAidUMz0zrvjXNeeyoe2hgc2X6YFtVhyEtMQ +6q8VMaqomjuQDIyxUUdt2xHZWyxHPntDrzsmCCDH5Md27CZ4Z5jsSOdgnlrCkjd9 +NtuIzxTlrExe3whIlTL0DURTvAVV/tPoVpVSvHTWCdziaCWm1hG5T6ZCN2W0pBgV +k+UNNtXr1ksr/Ma0Z84ShKjpN+soFOIZ3GDDGY8Xr/74jZo2c4025TpeIPcPBE0Z +b2K24l0UNnQbgm3ryI7NRrZ7psNnhvnWZru4w5Ykr8RKBf3n1AZOt8+dXHNvJQeU +/CUA6pKpc+sIxzAtWtS2ABqdOTJTXj2GyYNsz38dI2uzecUzT7oEtHWq9sr376EO +xMv1wXsUBli8INhPricfojH2/ACnRPysFglAKTx0IpcwqYoV5pGkk0iK1DNkQMv4 +Kf7cD2hLWv9+Fgbrjfu/gNCL0bSJBjaTHngACePsLEQkmGomscDySYXa+PHeB8Ln +QCWsE6MkeDa5g3gUOPV/vC6qv0ZBvQj1p3sOeyIQATDBYX+KheCO+N0ZCCdlT5Sx +thKNDusultu6/lyjGum0c/5oGgoDKTIkibgljekKupBRAykCAwEAAaNTMFEwHQYD +VR0OBBYEFEnPwBjLDPzta3nBrUEH+Gz1nBJRMB8GA1UdIwQYMBaAFEnPwBjLDPzt +a3nBrUEH+Gz1nBJRMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIB +AIW829ODEBvBp+vUwt3hbsJdBx9DzwcBBJ+Mew95YVveiosU+3p1HaWCkc4srgOi +hDNbpvfbqBIKVAZzesJbuTEeQ+Aj4M1lcFed9tvidVnmqDgHJApOI61GihT9MDjC +dH6vhcsorE8E5F58FcMJ0QDynVOuCA/gHkS3ucMxBsNKGfYNe0rOGMw23CzHdnz7 +UonOKhrhIQawI3ELLQxo2LWZEmMsluznE4MB+RGdn3k/UzOY9x4bauv4hrHjYvEm +x/zp2iWhZXvvTZ9u9xaHXEzbSU3WFCcXE/l9SDPM7ip9fabUrvaTJkmM190bD9sk +VcEFOAFBVxfx2fvmBIYqvSkVbcWdAT6x8k+MVNfq99/1aIdh+DTB3XUB7FkE/fnU +tXr/1sz0guwfNI/cHvhHisjmbgMhZWeeVV1YZHOJCsK7eq9rGq5Uul8nYrTdYKlN +u0OcrBXYBB6ny7ljCTahyPPmFZh/4Mgr0xROn2HyhTCpDHjnD1M5jT783fvWqu06 +cpOjRIPYNtA/UQAWUZWrTuJTD6Kju9tDI8i1NtMWIXd7w0LDXsXeHpRoDrVvOIPT +P0ivUpt4dFHnwR8Dos4NI/j9mkdGCblUxDsd/7or8gjedOoWjWfGCNlYa72aq0gf +KjKGPISC3BGTNtwRTe+E7k38r+Y6YO/Tnu9RvBdDM3vF +-----END CERTIFICATE----- diff --git a/tests/integration/test_accept_invalid_certificate/certs/self-key.pem b/tests/integration/test_accept_invalid_certificate/certs/self-key.pem new file mode 100644 index 00000000000..10b299a8c0c --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/certs/self-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQCxA7t/UCvROFmO +HTQp65yTIkN/+g9D7rsRfZjzZbj92XwqYeSyYgqnb4IkXCBjv9zW2L4OPFVevi1n +KfNR4Niq6NOsCJ1QzPTOu+Nc157Kh7aGBzZfpgW1WHIS0xDqrxUxqqiaO5AMjLFR +R23bEdlbLEc+e0OvOyYIIMfkx3bsJnhnmOxI52CeWsKSN30224jPFOWsTF7fCEiV +MvQNRFO8BVX+0+hWlVK8dNYJ3OJoJabWEblPpkI3ZbSkGBWT5Q021evWSyv8xrRn +zhKEqOk36ygU4hncYMMZjxev/viNmjZzjTblOl4g9w8ETRlvYrbiXRQ2dBuCbevI +js1Gtnumw2eG+dZmu7jDliSvxEoF/efUBk63z51cc28lB5T8JQDqkqlz6wjHMC1a +1LYAGp05MlNePYbJg2zPfx0ja7N5xTNPugS0dar2yvfvoQ7Ey/XBexQGWLwg2E+u +Jx+iMfb8AKdE/KwWCUApPHQilzCpihXmkaSTSIrUM2RAy/gp/twPaEta/34WBuuN ++7+A0IvRtIkGNpMeeAAJ4+wsRCSYaiaxwPJJhdr48d4HwudAJawToyR4NrmDeBQ4 +9X+8Lqq/RkG9CPWnew57IhABMMFhf4qF4I743RkIJ2VPlLG2Eo0O6y6W27r+XKMa +6bRz/mgaCgMpMiSJuCWN6Qq6kFEDKQIDAQABAoICAA4BaZhifipeTFeo395hPKY3 +k8z7EAd5SrvQ4C/XNefVNAnLwmM1dqUAzEnsMB6JqQN8FVgzAIjagI/PtedzOYbB +nTMuC08VUwP4bGjay+mNC1gCF2nsojmoTH5IzOcvVdH0MBrj6OXVS+z5EIFrdsh2 +nGcegaB1DEagXAXcnQAvmwniA5qmaUfzjmmF58EAuwuyGj9OW5icoyw1jTHE3OCH +hjeqCeav3I1PVpAOi1YKL0UeFIzExCsglPEBfCjoz+mCy4IiYydA4Y1FChoM80bw +CDUpc+UH6D02y9JVA317XMuPuhmGS2RNp+vYiHv+Uide40vbq+w0nkSOvUHXC7lo +UXf26sZWHcQP8TxewjZFcajApk03lCAb/u4skt7OQxbDOcvrcdbCsSpZPS16s3CA +OWA9fS7WJDmFx/dhzm4xNaXpz9z5MMkUfnFqgceG3H1leamL66J+eOzlquQ8LdsJ +/wTvCi1SsR3JmqT+TpqlXTJci7A+I/b0Yt6a0s2HunQ5LA1Xomffhn/00IxUeqyi +4MKh7wnoCaRlwZVORgQfKnosSkbkixDabwr9BMnMryE+NdQI3JKCvchdB0idyk1z +igTgZYpaxZg9hwTlOITGYSsprN7tiwjNQG+Kivatn1NxnxhD8lpDoUqluR7mZs1e +CwOLrB+irCULr7tcRYtxAoIBAQDFV7E1Oz8j8NkvkLztIFJ0ej7zEyNp3HUJb4bd +SRvF2AL6sqSw0xVLK8HbrlfPo1Nex8nZUfY3v2DmPS4+S3mrtk+8XoRkvNn0VdL4 +FqLfygYsURI8xVp4zTsAmR+7grPJbb/u3XG7CBY3T4XGHSbMmYAu7+W3A2t8Q+0U +fptju7Gc9gEACcSfap45CzEnFbabxNq3FJJ9V92N/xVQcOzNLxPxgn6tVhPeQnXV +0WHJQdxB/8DivgOObiTCB7on9tNbSHisTYtGwn6Kz0961ewYU7Xj4epj7A35nWhI +E10FDAf3T7cQa9ubSFZwOiSe0voAbiWbN3/M+zgkZDp2+wlnAoIBAQDloTtzgQaA +//VPNhB2d7iOgAl5cUSWqY0HLJobw1ETsdbkGRTgZrFXe4XlXwHB4S+zVEMmyR3p +HY0sYoAdpmd3Ggo4efJEx9IxkD6UZ71we02szvt4MdV5mwjHOAQfEJffktVkeqEJ +gdekSmaHTeryBf3ESpefq+OH1qaj/IjivySLI4c8fPcxNg0nMmDsdMzRaDOHDDa2 +nUpRBeMFkwiiPJpALPPp/4be0d9mrahPd4zBoBieZwaP6Zhwksi0NaM4WKEfx4Fd +TzH0K7UfLDMJ13wuTsNPzjPlDA+R4E/O3qVZxtHzuqc7fCksE7inYZghIRSjQHmv +mmLPaFdaIWTvAoIBAQC50BQfVyXgpYsaamI5tFFvMxvKrJ6PsvDQbZ7DBW+NHhPQ +Eh1YEajfGthyDPrJm7OIbv30DmSToTOQFtNP/KkXsZXqe+SVJezjT1xSFJ5PsV2W +pM9TQVAqprODoKpsWGkIpLowI3pO7RJeChafsBs0UuSLTX8yj5nJa/vzbDLL+ZMH +g+r1Kj1lDXLlq1AgySUkDnsSH+C+/Ni0NxbOo0WU/Vr98C1u8LnMcS+aoIg+hDDs +Q/BOjzOyDKu3tqblslbYgKh/NJSLj96uvPzNYFfpoAiIBdQ5Oa7+9eOW2gSgF+/4 +QarRX31r/PTGvzh0A5s6hHwMZnmDfJgkS+tw1HbJAoIBAQDfauQBGYROdrsieqPB +3/H9E64kLoRhYk4IsOiwInT8CIfgHlcljkT1rQU1ih6XmJVkkZNUZVFowovcp2mn +b4QzaW6QTHXfXduwxXtdwSGcExwcCra41yOw0ZrU2jeIUgiySpa80GNmcKJzAsCG +3NS+7f1dwoWLoR4byEkSPF6i3v6bMdIelCeeBcz+WhHbfjjZddzMhKGnQ5pPAr9D +aS95SiV6U0JuZWO/38rQiB2+U77rlOzfEbZcxxa+UNJeI+HOrMz3spC3TJp5yxzK +W6K87J+yHogBWcs8C2QKtohv6khO9RbJYM3dHoygif+bO2Vctmbx/4VlyXxh+E8l +LmCZAoIBAFR+dd7cW0pDkOsPsGZbWwDPNRwo2lxmqf2B5w9zoGNFoc4qV5k9tJfu +kPcwol6h2P4s2olYTgYC6hjPmE17U1TyP2DFeOw5IdxzNqy7dZndmaTs4qFLnDv3 +HIbkX42p7v++EU9eKAhYKJzJpuianMZ+v5azV1wNS5TDuN2TFULyIi+1UEw0ONuJ +rA97idb2PrO2sT/eBj02uiOcPhZH6CHSMxDp7QG+S1AVZBgutTQQdeAsb6pE/iW0 +u3IzGNxTYjTmF+u7iNdOPiaTFpdnltDviADESMJGH7PIk7r7PXMI9ID+LbBFGJ7Q +duW6PGuZ9JMBbxbNNvYayVgPeAeUsUI= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_accept_invalid_certificate/configs/ssl_config.xml b/tests/integration/test_accept_invalid_certificate/configs/ssl_config.xml new file mode 100644 index 00000000000..a1dd1a24693 --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/configs/ssl_config.xml @@ -0,0 +1,17 @@ + + + 9440 + + + + + /etc/clickhouse-server/config.d/self-cert.pem + /etc/clickhouse-server/config.d/self-key.pem + /etc/clickhouse-server/config.d/ca-cert.pem + none + + + diff --git a/tests/integration/test_accept_invalid_certificate/test.py b/tests/integration/test_accept_invalid_certificate/test.py new file mode 100644 index 00000000000..87229d75f90 --- /dev/null +++ b/tests/integration/test_accept_invalid_certificate/test.py @@ -0,0 +1,92 @@ +import pytest +from helpers.client import Client +from helpers.cluster import ClickHouseCluster +import os.path +from os import remove + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +MAX_RETRY = 5 + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + main_configs=[ + "configs/ssl_config.xml", + "certs/self-key.pem", + "certs/self-cert.pem", + "certs/ca-cert.pem", + ], +) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +config_default = """ +""" + +config_accept = """ + 1 +""" + +config_connection_accept = """ + + + {ip_address} + 1 + + +""" + + +def execute_query_native(node, query, config): + config_path = f"{SCRIPT_DIR}/configs/client.xml" + + file = open(config_path, "w") + file.write(config) + file.close() + + client = Client( + node.ip_address, + 9440, + command=cluster.client_bin_path, + secure=True, + config=config_path, + ) + + try: + result = client.query(query) + remove(config_path) + return result + except: + remove(config_path) + raise + + +def test_default(): + with pytest.raises(Exception) as err: + execute_query_native(instance, "SELECT 1", config_default) + assert "certificate verify failed" in str(err.value) + + +def test_accept(): + assert execute_query_native(instance, "SELECT 1", config_accept) == "1\n" + + +def test_connection_accept(): + assert ( + execute_query_native( + instance, + "SELECT 1", + config_connection_accept.format(ip_address=f"{instance.ip_address}"), + ) + == "1\n" + ) diff --git a/tests/integration/test_disk_configuration/test.py b/tests/integration/test_disk_configuration/test.py index c003ff85755..afc5303298c 100644 --- a/tests/integration/test_disk_configuration/test.py +++ b/tests/integration/test_disk_configuration/test.py @@ -208,13 +208,21 @@ def test_merge_tree_custom_disk_setting(start_cluster): secret_access_key='minio123'); """ ) - count = len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + + list1 = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) + count1 = len(list1) + node1.query(f"INSERT INTO {TABLE_NAME}_3 SELECT number FROM numbers(100)") assert int(node1.query(f"SELECT count() FROM {TABLE_NAME}_3")) == 100 - assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == count - ) + + list2 = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) + count2 = len(list2) + + if count1 != count2: + print("list1: ", list1) + print("list2: ", list2) + + assert count1 == count2 assert ( len(list(minio.list_objects(cluster.minio_bucket, "data2/", recursive=True))) > 0 diff --git a/tests/integration/test_force_deduplication/test.py b/tests/integration/test_force_deduplication/test.py index 87b2c45bbc5..14c11bc8500 100644 --- a/tests/integration/test_force_deduplication/test.py +++ b/tests/integration/test_force_deduplication/test.py @@ -29,6 +29,8 @@ def get_counts(): def test_basic(start_cluster): + old_src, old_a, old_b, old_c = 0, 0, 0, 0 + node.query( """ CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); @@ -39,6 +41,15 @@ def test_basic(start_cluster): INSERT INTO test values(999); """ ) + + src, a, b, c = get_counts() + assert src == old_src + 1 + assert a == old_a + 2 + assert b == old_b + 2 + assert c == old_c + 2 + old_src, old_a, old_b, old_c = src, a, b, c + + # that issert fails on test_mv_b due to partitions by A with pytest.raises(QueryRuntimeException): node.query( """ @@ -46,22 +57,23 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); """ ) + src, a, b, c = get_counts() + assert src == old_src + 10 + assert a == old_a + 10 + assert b == old_b + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c - old_src, old_a, old_b, old_c = get_counts() - # number of rows in test_mv_a and test_mv_c depends on order of inserts into views - assert old_src == 11 - assert old_a in (1, 11) - assert old_b == 1 - assert old_c in (1, 11) - + # deduplication only for src table node.query("INSERT INTO test SELECT number FROM numbers(10)") src, a, b, c = get_counts() - # no changes because of deduplication in source table assert src == old_src - assert a == old_a - assert b == old_b - assert c == old_c + assert a == old_a + 10 + assert b == old_b + 10 + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`. node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; @@ -69,11 +81,27 @@ def test_basic(start_cluster): """ ) src, a, b, c = get_counts() - assert src == 11 - assert a == old_a + 10 # first insert could be succesfull with disabled dedup - assert b == 11 + assert src == old_src + assert a == old_a + 10 + assert b == old_b + 10 assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for all the tables + node.query( + """ + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + src, a, b, c = get_counts() + assert src == old_src + assert a == old_a + assert b == old_b + assert c == old_c + old_src, old_a, old_b, old_c = src, a, b, c + + # that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated with pytest.raises(QueryRuntimeException): node.query( """ @@ -82,16 +110,23 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(100,10); """ ) + src, a, b, c = get_counts() + assert src == old_src + 10 + assert a == old_a + 10 + assert b == old_b + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; INSERT INTO test SELECT number FROM numbers(100,10); """ ) - src, a, b, c = get_counts() - assert src == 21 - assert a == old_a + 20 - assert b == 21 - assert c == old_c + 20 + assert src == old_src + assert a == old_a + assert b == old_b + 10 + assert c == old_c + old_src, old_a, old_b, old_c = src, a, b, c diff --git a/tests/integration/test_host_resolver_fail_count/configs/config.d/cluster.xml b/tests/integration/test_host_resolver_fail_count/configs/config.d/cluster.xml deleted file mode 100644 index bde62b82719..00000000000 --- a/tests/integration/test_host_resolver_fail_count/configs/config.d/cluster.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - 5 - 5 - 5 - 5 - 5 - 5 - - - \ No newline at end of file diff --git a/tests/integration/test_host_resolver_fail_count/configs/config.d/s3.xml b/tests/integration/test_host_resolver_fail_count/configs/config.d/s3.xml deleted file mode 100644 index 94ac83b32ac..00000000000 --- a/tests/integration/test_host_resolver_fail_count/configs/config.d/s3.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - s3 - http://minio1:9001/root/data/ - minio - minio123 - - - - - -

- s3 -
- - - - - diff --git a/tests/integration/test_host_resolver_fail_count/test_case.py b/tests/integration/test_host_resolver_fail_count/test_case.py deleted file mode 100644 index d25681f0781..00000000000 --- a/tests/integration/test_host_resolver_fail_count/test_case.py +++ /dev/null @@ -1,126 +0,0 @@ -"""Test Interserver responses on configured IP.""" - -import pytest -import time -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) - -node = cluster.add_instance( - "node", - main_configs=["configs/config.d/cluster.xml", "configs/config.d/s3.xml"], - with_minio=True, -) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -# The same value as in ClickHouse, this can't be confugured via config now -DEFAULT_RESOLVE_TIME_HISTORY_SECONDS = 2 * 60 - - -def test_host_resolver(start_cluster): - minio_ip = cluster.get_instance_ip("minio1") - - # drop DNS cache - node.set_hosts( - [ - (minio_ip, "minio1"), - (node.ip_address, "minio1"), # no answer on 9001 port on this IP - ] - ) - - node.query("SYSTEM DROP DNS CACHE") - node.query("SYSTEM DROP CONNECTIONS CACHE") - - node.query( - """ - CREATE TABLE test (key UInt32, value UInt32) - Engine=MergeTree() - ORDER BY key PARTITION BY key - SETTINGS storage_policy='s3' - """ - ) - - initial_fails = "0\n" - k = 0 - limit = 100 - while initial_fails == "0\n": - node.query( - f""" - INSERT INTO test VALUES (0,{k}) - """ - ) - # HostResolver chooses IP randomly, so on single call can choose worked ID - initial_fails = node.query( - "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" - ) - k += 1 - if k >= limit: - # Dead IP was not choosen for 100 iteration. - # This is not expected, but not an error actually. - # And test should be stopped. - return - - # initial_fails can be more than 1 if clickhouse does something in several parallel threads - - for j in range(10): - for i in range(10): - node.query( - f""" - INSERT INTO test VALUES ({i+1},{j+1}) - """ - ) - fails = node.query( - "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" - ) - assert fails == initial_fails - - # Check that clickhouse tries to recheck IP after 2 minutes - time.sleep(DEFAULT_RESOLVE_TIME_HISTORY_SECONDS) - - intermediate_fails = initial_fails - limit = k + 100 - while intermediate_fails == initial_fails: - node.query( - f""" - INSERT INTO test VALUES (101,{k}) - """ - ) - intermediate_fails = node.query( - "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" - ) - k += 1 - if k >= limit: - # Dead IP was not choosen for 100 iteration. - # This is not expected, but not an error actually. - # And test should be stopped. - return - - # After another 2 minutes shoudl not be new fails, next retry after 4 minutes - time.sleep(DEFAULT_RESOLVE_TIME_HISTORY_SECONDS) - - initial_fails = intermediate_fails - limit = k + 100 - while intermediate_fails == initial_fails: - node.query( - f""" - INSERT INTO test VALUES (102,{k}) - """ - ) - intermediate_fails = node.query( - "SELECT value FROM system.events WHERE event LIKE 'AddressesMarkedAsFailed'" - ) - k += 1 - if k >= limit: - break - - assert k == limit diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index d7b4230d872..31316af7b1e 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -104,3 +104,24 @@ def test_keeper_map_without_zk(started_cluster): node.query("DETACH TABLE test_keeper_map_without_zk") client.stop() + + +def test_keeper_map_with_failed_drop(started_cluster): + run_query( + "CREATE TABLE test_keeper_map_with_failed_drop (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_with_failed_drop') PRIMARY KEY(key);" + ) + + run_query("INSERT INTO test_keeper_map_with_failed_drop VALUES (1, 11)") + run_query("SYSTEM ENABLE FAILPOINT keepermap_fail_drop_data") + node.query("DROP TABLE test_keeper_map_with_failed_drop SYNC") + + zk_client = get_genuine_zk() + assert ( + zk_client.get("/test_keeper_map/test_keeper_map_with_failed_drop/data") + is not None + ) + + run_query("SYSTEM DISABLE FAILPOINT keepermap_fail_drop_data") + run_query( + "CREATE TABLE test_keeper_map_with_failed_drop_another (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_with_failed_drop') PRIMARY KEY(key);" + ) diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index b8e67551d79..c2469d6c524 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -266,7 +266,7 @@ def test_lost_last_part(start_cluster): "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - partition_id = node1.query("select partitionId('x')").strip() + partition_id = node1.query("select partitionID('x')").strip() remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") # other way to detect broken parts diff --git a/tests/integration/test_mask_sensitive_info/configs/users.xml b/tests/integration/test_mask_sensitive_info/configs/users.xml index f129a5bb3e3..f767216e907 100644 --- a/tests/integration/test_mask_sensitive_info/configs/users.xml +++ b/tests/integration/test_mask_sensitive_info/configs/users.xml @@ -2,6 +2,7 @@ 5 + 5 diff --git a/tests/integration/test_host_resolver_fail_count/__init__.py b/tests/integration/test_memory_limit/__init__.py similarity index 100% rename from tests/integration/test_host_resolver_fail_count/__init__.py rename to tests/integration/test_memory_limit/__init__.py diff --git a/tests/integration/test_memory_limit/configs/async_metrics_no.xml b/tests/integration/test_memory_limit/configs/async_metrics_no.xml new file mode 100644 index 00000000000..96cae3bf387 --- /dev/null +++ b/tests/integration/test_memory_limit/configs/async_metrics_no.xml @@ -0,0 +1,21 @@ + + + 86400 + + + + + + + + + + + + + + + + + + diff --git a/tests/integration/test_memory_limit/test.py b/tests/integration/test_memory_limit/test.py new file mode 100644 index 00000000000..6d6745711da --- /dev/null +++ b/tests/integration/test_memory_limit/test.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +import logging +import time +import pytest +from helpers.cluster import ClickHouseCluster +from multiprocessing.dummy import Pool + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=[ + "configs/async_metrics_no.xml", + ], + mem_limit="4g", + env_variables={"MALLOC_CONF": "dirty_decay_ms:0"}, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_multiple_queries(): + if node.is_built_with_sanitizer(): + return + + p = Pool(15) + + def run_query(node): + try: + node.query("SELECT * FROM system.numbers GROUP BY number") + except Exception as ex: + print("Exception", ex) + raise ex + + tasks = [] + for i in range(30): + tasks.append(p.apply_async(run_query, (node,))) + time.sleep(i * 0.1) + + for task in tasks: + try: + task.get() + except Exception as ex: + print("Exception", ex) + + # test that we didn't kill the server + node.query("SELECT 1") diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 756a1e1996c..3af88759e82 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -43,15 +43,10 @@ def started_cluster(): config = """ - none - + strict {certificateFile} {privateKeyFile} {caConfig} - - - AcceptCertificateHandler - """ diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index b131d8fe51c..5948954ff5f 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -908,6 +908,63 @@ def test_mysql_point(started_cluster): conn.close() +def test_joins(started_cluster): + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) + drop_mysql_table(conn, "test_joins_mysql_users") + with conn.cursor() as cursor: + cursor.execute( + "CREATE TABLE clickhouse.test_joins_mysql_users (id INT NOT NULL, name varchar(50) NOT NULL, created TIMESTAMP, PRIMARY KEY (`id`)) ENGINE=InnoDB;" + ) + cursor.execute( + f"INSERT INTO clickhouse.test_joins_mysql_users VALUES (469722, 'user@example.com', '2019-08-30 07:55:01')" + ) + + drop_mysql_table(conn, "test_joins_mysql_tickets") + with conn.cursor() as cursor: + cursor.execute( + "CREATE TABLE clickhouse.test_joins_mysql_tickets (id INT NOT NULL, subject varchar(50), created TIMESTAMP, creator INT NOT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB;" + ) + cursor.execute( + f"INSERT INTO clickhouse.test_joins_mysql_tickets VALUES (281607, 'Feedback', '2024-06-25 12:09:41', 469722)" + ) + + conn.commit() + + node1.query( + """ + CREATE TABLE test_joins_table_users + ( + `id` Int32, + `Name` String, + `Created` Nullable(DateTime) + ) + ENGINE = MySQL('mysql80:3306', 'clickhouse', 'test_joins_mysql_users', 'root', 'clickhouse'); + """ + ) + + node1.query( + """ + CREATE TABLE test_joins_table_tickets + ( + `id` Int32, + `Subject` Nullable(String), + `Created` Nullable(DateTime), + `Creator` Int32 + ) + ENGINE = MySQL('mysql80:3306', 'clickhouse', 'test_joins_mysql_tickets', 'root', 'clickhouse'); + """ + ) + + node1.query( + """ + SELECT test_joins_table_tickets.id, Subject, test_joins_table_tickets.Created, Name + FROM test_joins_table_tickets + LEFT JOIN test_joins_table_users ON test_joins_table_tickets.Creator = test_joins_table_users.id + WHERE test_joins_table_tickets.Created = '2024-06-25 12:09:41' + """ + ) == "281607\tFeedback\t2024-06-25 12:09:41\tuser@example.com\n" + + if __name__ == "__main__": with contextmanager(started_cluster)() as cluster: for name, instance in list(cluster.instances.items()): diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index d9f3a9917ab..12823f1f72d 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -834,6 +834,60 @@ def test_literal_escaping(started_cluster): cursor.execute(f"DROP TABLE escaping") +def test_filter_pushdown(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("CREATE SCHEMA test_filter_pushdown") + cursor.execute( + "CREATE TABLE test_filter_pushdown.test_table (id integer, value integer)" + ) + cursor.execute( + "INSERT INTO test_filter_pushdown.test_table VALUES (1, 10), (1, 110), (2, 0), (3, 33), (4, 0)" + ) + + node1.query( + """ + CREATE TABLE test_filter_pushdown_pg_table (id UInt32, value UInt32) + ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_table', 'postgres', 'mysecretpassword', 'test_filter_pushdown'); + """ + ) + + node1.query( + """ + CREATE TABLE test_filter_pushdown_local_table (id UInt32, value UInt32) ENGINE Memory AS SELECT * FROM test_filter_pushdown_pg_table + """ + ) + + node1.query( + "CREATE TABLE ch_table (id UInt32, pg_id UInt32) ENGINE MergeTree ORDER BY id" + ) + node1.query("INSERT INTO ch_table VALUES (1, 1), (2, 2), (3, 1), (4, 2), (5, 999)") + + def compare_results(query, **kwargs): + result1 = node1.query( + query.format(pg_table="test_filter_pushdown_pg_table", **kwargs) + ) + result2 = node1.query( + query.format(pg_table="test_filter_pushdown_local_table", **kwargs) + ) + assert result1 == result2 + + for kind in ["INNER", "LEFT", "RIGHT", "FULL", "ANY LEFT", "SEMI RIGHT"]: + for value in [0, 10]: + compare_results( + "SELECT * FROM ch_table {kind} JOIN {pg_table} as p ON ch_table.pg_id = p.id WHERE value = {value} ORDER BY ALL", + kind=kind, + value=value, + ) + + compare_results( + "SELECT * FROM {pg_table} as p {kind} JOIN ch_table ON ch_table.pg_id = p.id WHERE value = {value} ORDER BY ALL", + kind=kind, + value=value, + ) + + cursor.execute("DROP SCHEMA test_filter_pushdown CASCADE") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index f885a3507ac..bc255b1964e 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -2220,13 +2220,11 @@ def test_rabbitmq_commit_on_block_write(rabbitmq_cluster): def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster): - # no connection when table is initialized - rabbitmq_cluster.pause_container("rabbitmq1") - instance.query_and_get_error( + error = instance.query_and_get_error( """ CREATE TABLE test.cs (key UInt64, value UInt64) ENGINE = RabbitMQ - SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + SETTINGS rabbitmq_host_port = 'no_connection_at_startup:5672', rabbitmq_exchange_name = 'cs', rabbitmq_format = 'JSONEachRow', rabbitmq_flush_interval_ms=1000, @@ -2234,7 +2232,7 @@ def test_rabbitmq_no_connection_at_startup_1(rabbitmq_cluster): rabbitmq_row_delimiter = '\\n'; """ ) - rabbitmq_cluster.unpause_container("rabbitmq1") + assert "CANNOT_CONNECT_RABBITMQ" in error def test_rabbitmq_no_connection_at_startup_2(rabbitmq_cluster): diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.reference b/tests/integration/test_system_detached_tables/__init__.py similarity index 100% rename from tests/queries/0_stateless/02805_distributed_queries_timeouts.reference rename to tests/integration/test_system_detached_tables/__init__.py diff --git a/tests/integration/test_system_detached_tables/configs/remote_servers.xml b/tests/integration/test_system_detached_tables/configs/remote_servers.xml new file mode 100644 index 00000000000..8b579724282 --- /dev/null +++ b/tests/integration/test_system_detached_tables/configs/remote_servers.xml @@ -0,0 +1,13 @@ + + + + + true + + replica1 + 9000 + + + + + \ No newline at end of file diff --git a/tests/integration/test_system_detached_tables/test.py b/tests/integration/test_system_detached_tables/test.py new file mode 100644 index 00000000000..2eb870efcbc --- /dev/null +++ b/tests/integration/test_system_detached_tables/test.py @@ -0,0 +1,112 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "replica1", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml"], + macros={"replica": "replica1"}, + stay_alive=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.mark.parametrize( + "db_name,db_engine,table_engine,table_engine_params", + [ + pytest.param( + "test_db_atomic", + "Atomic", + "MergeTree", + "ORDER BY n", + id="Atomic db with MergeTree table", + ), + pytest.param( + "test_db_lazy", + "Lazy(60)", + "Log", + "", + id="Lazy db with Log table", + ), + pytest.param( + "test_db_repl", + "Replicated('/clickhouse/tables/test_table','shard1', 'replica1')", + "ReplicatedMergeTree", + "ORDER BY n", + id="Replicated db with ReplicatedMergeTree table", + ), + ], +) +def test_system_detached_tables( + start_cluster, db_name, db_engine, table_engine, table_engine_params +): + node.query(f"CREATE DATABASE IF NOT EXISTS {db_name} ENGINE={db_engine};") + + node.query( + f"CREATE TABLE {db_name}.test_table (n Int64) ENGINE={table_engine} {table_engine_params};" + ) + node.query( + f"CREATE TABLE {db_name}.test_table_perm (n Int64) ENGINE={table_engine} {table_engine_params};" + ) + + test_table_uuid = node.query( + "SELECT uuid FROM system.tables WHERE table='test_table'" + ).rstrip("\n") + test_table_metadata_path = node.query( + "SELECT metadata_path FROM system.tables WHERE table='test_table'" + ).rstrip("\n") + + test_table_perm_uuid = node.query( + "SELECT uuid FROM system.tables WHERE table='test_table_perm'" + ).rstrip("\n") + test_table_perm_metadata_path = node.query( + "SELECT metadata_path FROM system.tables WHERE table='test_table_perm'" + ).rstrip("\n") + + assert "" == node.query( + f"SELECT * FROM system.detached_tables WHERE database='{db_name}'" + ) + + node.query( + f"SET database_replicated_always_detach_permanently=1; DETACH TABLE {db_name}.test_table" + ) + node.query(f"DETACH TABLE {db_name}.test_table_perm PERMANENTLY") + + querry = f"SELECT database, table, is_permanently, uuid, metadata_path FROM system.detached_tables WHERE database='{db_name}' FORMAT Values" + result = node.query(querry) + + if db_engine.startswith("Repl"): + expected_before_restart = f"('{db_name}','test_table',1,'{test_table_uuid}','{test_table_metadata_path}'),('{db_name}','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + else: + expected_before_restart = f"('{db_name}','test_table',0,'{test_table_uuid}','{test_table_metadata_path}'),('{db_name}','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + + assert result == expected_before_restart + + if db_engine.startswith("Lazy"): + return + + node.restart_clickhouse() + + if db_engine.startswith("Repl"): + expected_after_restart = expected_before_restart + else: + expected_after_restart = f"('{db_name}','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + + result = node.query(querry) + assert result == expected_after_restart + + node.restart_clickhouse() + + result = node.query(querry) + assert result == expected_after_restart + + node.query(f"DROP DATABASE {db_name}") diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py index aa8030b077c..b7f9d7c0b96 100644 --- a/tests/integration/test_table_db_num_limit/test.py +++ b/tests/integration/test_table_db_num_limit/test.py @@ -4,9 +4,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", main_configs=["config/config.xml"], with_zookeeper=True -) +node = cluster.add_instance("node", main_configs=["config/config.xml"]) @pytest.fixture(scope="module") @@ -21,23 +19,28 @@ def started_cluster(): def test_table_db_limit(started_cluster): - for i in range(10): - node1.query("create database db{}".format(i)) + # By the way, default database already exists. + for i in range(9): + node.query("create database db{}".format(i)) with pytest.raises(QueryRuntimeException) as exp_info: - node1.query("create database db_exp".format(i)) + node.query("create database db_exp".format(i)) assert "TOO_MANY_DATABASES" in str(exp_info) for i in range(10): - node1.query("create table t{} (a Int32) Engine = Log".format(i)) + node.query("create table t{} (a Int32) Engine = Log".format(i)) + + # This checks that system tables are not accounted in the number of tables. + node.query("system flush logs") - node1.query("system flush logs") for i in range(10): - node1.query("drop table t{}".format(i)) + node.query("drop table t{}".format(i)) + for i in range(10): - node1.query("create table t{} (a Int32) Engine = Log".format(i)) + node.query("create table t{} (a Int32) Engine = Log".format(i)) with pytest.raises(QueryRuntimeException) as exp_info: - node1.query("create table default.tx (a Int32) Engine = Log") + node.query("create table default.tx (a Int32) Engine = Log") + assert "TOO_MANY_TABLES" in str(exp_info) diff --git a/tests/integration/test_user_valid_until/test.py b/tests/integration/test_user_valid_until/test.py index d6d5bf8b18e..39ca5997067 100644 --- a/tests/integration/test_user_valid_until/test.py +++ b/tests/integration/test_user_valid_until/test.py @@ -5,7 +5,7 @@ from time import sleep from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node") +node = cluster.add_instance("node", stay_alive=True) @pytest.fixture(scope="module") @@ -84,3 +84,22 @@ def test_details(started_cluster): node.query("SHOW CREATE USER user_details_time_only") == f"CREATE USER user_details_time_only VALID UNTIL \\'{until_year}-01-01 22:03:40\\'\n" ) + + +def test_restart(started_cluster): + node.query("CREATE USER user_restart VALID UNTIL '06/11/2010 08:03:20 Z+3'") + + assert ( + node.query("SHOW CREATE USER user_restart") + == "CREATE USER user_restart VALID UNTIL \\'2010-11-06 05:03:20\\'\n" + ) + + node.restart_clickhouse() + + assert ( + node.query("SHOW CREATE USER user_restart") + == "CREATE USER user_restart VALID UNTIL \\'2010-11-06 05:03:20\\'\n" + ) + + error = "Authentication failed" + assert error in node.query_and_get_error("SELECT 1", user="user_restart") diff --git a/tests/performance/final_big_column.xml b/tests/performance/final_big_column.xml index 1fd586d2d90..5225b3d7ad4 100644 --- a/tests/performance/final_big_column.xml +++ b/tests/performance/final_big_column.xml @@ -10,8 +10,8 @@ PARTITION BY toYYYYMM(d) ORDER BY key - INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(5000000) - INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(5000000) + INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(2500000) + INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(2500000) SELECT * FROM optimized_select_final FINAL FORMAT Null SETTINGS max_threads = 8 SELECT * FROM optimized_select_final FINAL WHERE key % 10 = 0 FORMAT Null diff --git a/tests/performance/function_tokens.xml b/tests/performance/function_tokens.xml index 1ff56323d62..bc2bc71a933 100644 --- a/tests/performance/function_tokens.xml +++ b/tests/performance/function_tokens.xml @@ -1,5 +1,5 @@ with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000) - with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(1000000) - with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(100000) + with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(200000) + with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(20000) diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml index b4dd87a7ae3..5fe1ffffe1d 100644 --- a/tests/performance/polymorphic_parts_s.xml +++ b/tests/performance/polymorphic_parts_s.xml @@ -24,10 +24,10 @@ 1
- - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100) + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(500) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact diff --git a/tests/performance/scripts/report.py b/tests/performance/scripts/report.py index c2bc773bd54..e45d709ca2c 100755 --- a/tests/performance/scripts/report.py +++ b/tests/performance/scripts/report.py @@ -555,7 +555,7 @@ if args.report == "main": "Total client time for measured query runs, s", # 2 "Queries", # 3 "Longest query, total for measured runs, s", # 4 - "Wall clock time per query, s", # 5 + "Average query wall clock time, s", # 5 "Shortest query, total for measured runs, s", # 6 "", # Runs #7 ] diff --git a/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference b/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference index 63b8a9d14fc..fc77ed8a241 100644 --- a/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference +++ b/tests/queries/0_stateless/00257_shard_no_aggregates_and_constant_keys.reference @@ -8,13 +8,13 @@ 40 41 -0 +41 2 42 2 42 43 -0 +43 11 11 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index adf6abb7298..9c9281dc7e4 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -1,7 +1,7 @@ 2 3 -2 +3 3 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index d3c4da86b41..51e6a513608 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert isn't deduplicated +-- Implicit insert isn't deduplicated, because deduplicate_blocks_in_dependent_materialized_views = 0 by default SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 1fb219108da..8f7d19028b0 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -36,8 +36,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE c" echo ${CLICKHOUSE_CLIENT} --query "CREATE TABLE root (d UInt64) ENGINE = Null" ${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW d (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/d', '1') ORDER BY d AS SELECT * FROM root" -${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; -${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; ${CLICKHOUSE_CLIENT} --query "SELECT * FROM d"; ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE d" diff --git a/tests/queries/0_stateless/00719_parallel_ddl_db.sh b/tests/queries/0_stateless/00719_parallel_ddl_db.sh index ceba24df7e4..ac4e4eae4f2 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_db.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_db.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -DB_SUFFIX=$RANDOM +DB_SUFFIX=${RANDOM}${RANDOM}${RANDOM}${RANDOM} ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" function query() diff --git a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh index ae728c8d10d..f0bc52ee356 100755 --- a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh +++ b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock, no-debug +# Tags: deadlock, no-debug, no-parallel # NOTE: database = $CLICKHOUSE_DATABASE is unwanted diff --git a/tests/queries/0_stateless/00927_asof_join_correct_bt.reference b/tests/queries/0_stateless/00927_asof_join_correct_bt.reference index bb199d0159a..28c48d2e290 100644 --- a/tests/queries/0_stateless/00927_asof_join_correct_bt.reference +++ b/tests/queries/0_stateless/00927_asof_join_correct_bt.reference @@ -1,13 +1,36 @@ +-- { echoOn } +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 diff --git a/tests/queries/0_stateless/00927_asof_join_correct_bt.sql b/tests/queries/0_stateless/00927_asof_join_correct_bt.sql index 281a81d51c0..d796b62d3b3 100644 --- a/tests/queries/0_stateless/00927_asof_join_correct_bt.sql +++ b/tests/queries/0_stateless/00927_asof_join_correct_bt.sql @@ -4,20 +4,29 @@ DROP TABLE IF EXISTS B; CREATE TABLE A(k UInt32, t UInt32, a UInt64) ENGINE = MergeTree() ORDER BY (k, t); INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5); -CREATE TABLE B(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +CREATE TABLE B1(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B1(k,t,b) VALUES (1,102,2), (1,104,4); +CREATE TABLE B2(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B2(k,t,b) VALUES (1,102,2), (1,104,4); -CREATE TABLE B(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +CREATE TABLE B3(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B3(k,t,b) VALUES (1,102,2), (1,104,4); -CREATE TABLE B(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +-- { echoOn } +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); + +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); + +-- { echoOff } + +DROP TABLE B1; +DROP TABLE B2; +DROP TABLE B3; DROP TABLE A; diff --git a/tests/queries/0_stateless/00927_asof_join_long.reference b/tests/queries/0_stateless/00927_asof_join_long.reference index d4f015c68e4..ec40d2bc463 100644 --- a/tests/queries/0_stateless/00927_asof_join_long.reference +++ b/tests/queries/0_stateless/00927_asof_join_long.reference @@ -1 +1,2 @@ 3000000 +3000000 diff --git a/tests/queries/0_stateless/00927_asof_join_long.sql b/tests/queries/0_stateless/00927_asof_join_long.sql index c03a06d48d4..7a73875e93e 100644 --- a/tests/queries/0_stateless/00927_asof_join_long.sql +++ b/tests/queries/0_stateless/00927_asof_join_long.sql @@ -2,15 +2,28 @@ DROP TABLE IF EXISTS tvs; +-- to use different algorithms for in subquery +SET allow_experimental_analyzer = 1; + CREATE TABLE tvs(k UInt32, t UInt32, tv UInt64) ENGINE = Memory; INSERT INTO tvs(k,t,tv) SELECT k, t, t FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys -CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times; +CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times +SETTINGS join_algorithm = 'hash'; SELECT SUM(trades.price - tvs.tv) FROM (SELECT k, t, t as price FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys - CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times) trades + CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times + SETTINGS join_algorithm = 'hash') trades ASOF LEFT JOIN tvs USING(k,t); +SELECT SUM(trades.price - tvs.tv) FROM +(SELECT k, t, t as price + FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys + CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times + SETTINGS join_algorithm = 'hash') trades +ASOF LEFT JOIN tvs USING(k,t) +SETTINGS join_algorithm = 'full_sorting_merge'; + DROP TABLE tvs; diff --git a/tests/queries/0_stateless/00927_asof_join_noninclusive.reference b/tests/queries/0_stateless/00927_asof_join_noninclusive.reference index fe2844a2a43..d856372fb4a 100644 --- a/tests/queries/0_stateless/00927_asof_join_noninclusive.reference +++ b/tests/queries/0_stateless/00927_asof_join_noninclusive.reference @@ -27,3 +27,32 @@ 2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +1 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +2 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +3 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:03 3 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:04 4 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:05 5 0 1970-01-01 00:00:00 0 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 diff --git a/tests/queries/0_stateless/00927_asof_join_noninclusive.sql b/tests/queries/0_stateless/00927_asof_join_noninclusive.sql index 5f15f3b593d..3cc99df4462 100644 --- a/tests/queries/0_stateless/00927_asof_join_noninclusive.sql +++ b/tests/queries/0_stateless/00927_asof_join_noninclusive.sql @@ -11,9 +11,12 @@ INSERT INTO B(k,t,b) VALUES (1,2,2),(1,4,4); INSERT INTO B(k,t,b) VALUES (2,3,3); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); - SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t); +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t); +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t); DROP TABLE A; diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.reference b/tests/queries/0_stateless/00927_asof_join_other_types.reference index 80c85ec1ae3..ddbc24ff925 100644 --- a/tests/queries/0_stateless/00927_asof_join_other_types.reference +++ b/tests/queries/0_stateless/00927_asof_join_other_types.reference @@ -1,27 +1,72 @@ +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- 2 1970-01-01 02:00:01 1 0 2 1970-01-01 02:00:03 3 3 2 1970-01-01 02:00:05 5 3 +- +2 1970-01-01 02:00:01 1 0 +2 1970-01-01 02:00:03 3 3 +2 1970-01-01 02:00:05 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1970-01-01 02:00:00.001 1 0 +2 1970-01-01 02:00:00.003 3 3 +2 1970-01-01 02:00:00.005 5 3 +- 2 1970-01-01 02:00:00.001 1 0 2 1970-01-01 02:00:00.003 3 3 2 1970-01-01 02:00:00.005 5 3 diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.sh b/tests/queries/0_stateless/00927_asof_join_other_types.sh deleted file mode 100755 index 10173a3e43f..00000000000 --- a/tests/queries/0_stateless/00927_asof_join_other_types.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -set -e - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Asia/Istanbul')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Asia/Istanbul')" -do - $CLICKHOUSE_CLIENT -mn <= B.t ORDER BY (A.a, A.t); SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t; SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b ORDER BY (A.a, A.t); @@ -28,5 +36,8 @@ ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A WHERE B.t != 3 ORDER BY (A.a, A.t) ; +{% endfor -%} +{% endfor -%} + DROP TABLE A; DROP TABLE B; diff --git a/tests/queries/0_stateless/00997_set_index_array.sql b/tests/queries/0_stateless/00997_set_index_array.sql index d6d27f5a6a0..ed972d1a545 100644 --- a/tests/queries/0_stateless/00997_set_index_array.sql +++ b/tests/queries/0_stateless/00997_set_index_array.sql @@ -12,10 +12,10 @@ ORDER BY (primary_key); INSERT INTO set_array select - toString(intDiv(number, 1000000)) as primary_key, + toString(intDiv(number, 100000)) as primary_key, array(number) as index_array from system.numbers -limit 10000000; +limit 1000000; OPTIMIZE TABLE set_array FINAL; diff --git a/tests/queries/0_stateless/01004_rename_deadlock.sh b/tests/queries/0_stateless/01004_rename_deadlock.sh index f0adf136e94..60d67e1744a 100755 --- a/tests/queries/0_stateless/01004_rename_deadlock.sh +++ b/tests/queries/0_stateless/01004_rename_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock +# Tags: deadlock, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01005_rwr_shard_deadlock.sh b/tests/queries/0_stateless/01005_rwr_shard_deadlock.sh index ef352606b69..51e8cdbc8e8 100755 --- a/tests/queries/0_stateless/01005_rwr_shard_deadlock.sh +++ b/tests/queries/0_stateless/01005_rwr_shard_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock, shard +# Tags: deadlock, shard, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01007_r1r2_w_r2r1_deadlock.sh b/tests/queries/0_stateless/01007_r1r2_w_r2r1_deadlock.sh index 9f4b2241732..d602ff6df3a 100755 --- a/tests/queries/0_stateless/01007_r1r2_w_r2r1_deadlock.sh +++ b/tests/queries/0_stateless/01007_r1r2_w_r2r1_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock +# Tags: deadlock, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh index 3046fcbcd73..e4b3a31b13f 100755 --- a/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh +++ b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh index 0d57bb25543..59a1e3cb980 100755 --- a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-fasttest +# Tags: zookeeper, no-fasttest, long, no-parallel set -e diff --git a/tests/queries/0_stateless/01053_ssd_dictionary.sh b/tests/queries/0_stateless/01053_ssd_dictionary.sh index 00e5719a9a9..fdd40940ab5 100755 --- a/tests/queries/0_stateless/01053_ssd_dictionary.sh +++ b/tests/queries/0_stateless/01053_ssd_dictionary.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # Tags: no-parallel, no-fasttest -# Tag no-fasttest: this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled +# Tag no-fasttest: this test mistakenly requires access to /var/lib/clickhouse -- can't run this locally, disabled CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -n --query=" DROP DATABASE IF EXISTS 01053_db; - CREATE DATABASE 01053_db Engine = Ordinary; + CREATE DATABASE 01053_db; DROP TABLE IF EXISTS 01053_db.table_for_dict; diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh index 399c9e488a4..382e3279711 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-fasttest +# Tags: long, zookeeper, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 90128d7a8ad..619b6e91d11 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper +# Tags: race, zookeeper, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference index 1055a67ea5b..0aa1a85f19d 100644 --- a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference +++ b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference @@ -1,3 +1,6 @@ v1 o1 ['s2','s1'] v1 o2 ['s4'] v2 o3 ['s5','s3'] +v1 o1 ['s2','s1'] +v1 o2 ['s4'] +v2 o3 ['s5','s3'] diff --git a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql index 8a94b6ddd24..652cb35cf2a 100644 --- a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql +++ b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql @@ -16,3 +16,17 @@ GROUP BY ORDER BY visitorId ASC, orderId ASC; + +SELECT + visitorId, + orderId, + groupUniqArray(sessionId) +FROM sessions +ASOF INNER JOIN orders ON (sessions.visitorId = orders.visitorId) AND (sessions.date <= orders.date) +GROUP BY + visitorId, + orderId +ORDER BY + visitorId ASC, + orderId ASC +SETTINGS join_algorithm = 'full_sorting_merge'; diff --git a/tests/queries/0_stateless/01194_http_query_id.sh b/tests/queries/0_stateless/01194_http_query_id.sh index 5aebdc10dfc..42321112185 100755 --- a/tests/queries/0_stateless/01194_http_query_id.sh +++ b/tests/queries/0_stateless/01194_http_query_id.sh @@ -4,14 +4,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="http://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/?session_id=test_01194" -rnd=$RANDOM +rnd="$CLICKHOUSE_DATABASE" +url="${CLICKHOUSE_URL}&session_id=test_01194_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CURL} -sS "$url&query=SELECT+'test_01194',$rnd,1" > /dev/null ${CLICKHOUSE_CURL} -sS "$url&query=SELECT+'test_01194',$rnd,2" > /dev/null ${CLICKHOUSE_CURL} -sS "$url" --data "SELECT 'test_01194',$rnd,3" > /dev/null ${CLICKHOUSE_CURL} -sS "$url" --data "SELECT 'test_01194',$rnd,4" > /dev/null -${CLICKHOUSE_CURL} -sS "$url" --data "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" -${CLICKHOUSE_CURL} -sS "$url&query=SELECT+count(DISTINCT+query_id)+FROM+system.query_log+WHERE+current_database+LIKE+currentDatabase()+AND+query+LIKE+'SELECT+''test_01194'',$rnd%25'" +$CLICKHOUSE_CLIENT -q " + SELECT + count(DISTINCT query_id) + FROM system.query_log + WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND query LIKE 'SELECT ''test_01194'',$rnd%' + AND query_id != queryID()" diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index f5f31c4a563..cf1f7054f6c 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -2,13 +2,18 @@ -select 'optimize_trivial_insert_select=0', 'max_insert_threads=0'; -optimize_trivial_insert_select=0 max_insert_threads=0 -insert into testX select number from numbers(10) settings + + + +select 'optimize_trivial_insert_select=0', 'max_insert_threads=0', 'iteration_num='; +optimize_trivial_insert_select=0 max_insert_threads=0 iteration_num= +insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_1', log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=0, - max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=0; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -19,20 +24,20 @@ select peak_threads_usage from system.query_log where Settings['max_insert_threads'] = '0'; 2 select count() from testX; -10 +200 select count() from testXA; -10 -select count() from testXB; -0 +200 select count() from testXC; -10 -select 'optimize_trivial_insert_select=0', 'max_insert_threads=16'; -optimize_trivial_insert_select=0 max_insert_threads=16 -insert into testX select number from numbers(10) settings +200 +select 'optimize_trivial_insert_select=0', 'max_insert_threads=5', 'iteration_num='; +optimize_trivial_insert_select=0 max_insert_threads=5 iteration_num= +insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_2', log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=0, - max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -40,23 +45,23 @@ select peak_threads_usage from system.query_log where query like '%insert into testX %' and Settings['parallel_view_processing'] = '0' and Settings['optimize_trivial_insert_select'] = '0' and - Settings['max_insert_threads'] = '16'; + Settings['max_insert_threads'] = '5'; 2 select count() from testX; -20 +400 select count() from testXA; -20 -select count() from testXB; -0 +400 select count() from testXC; -20 -select 'optimize_trivial_insert_select=1', 'max_insert_threads=0'; -optimize_trivial_insert_select=1 max_insert_threads=0 -insert into testX select number from numbers(10) settings +400 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=0', 'iteration_num='; +optimize_trivial_insert_select=1 max_insert_threads=0 iteration_num= +insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_3', log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=1, - max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=0; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -67,20 +72,20 @@ select peak_threads_usage from system.query_log where Settings['max_insert_threads'] = '0'; 2 select count() from testX; -30 +600 select count() from testXA; -30 -select count() from testXB; -0 +600 select count() from testXC; -30 -select 'optimize_trivial_insert_select=1', 'max_insert_threads=16'; -optimize_trivial_insert_select=1 max_insert_threads=16 -insert into testX select number from numbers(10) settings +600 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=5', 'iteration_num='; +optimize_trivial_insert_select=1 max_insert_threads=5 iteration_num= +insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_4', log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=1, - max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -88,23 +93,23 @@ select peak_threads_usage from system.query_log where query like '%insert into testX %' and Settings['parallel_view_processing'] = '0' and Settings['optimize_trivial_insert_select'] = '1' and - Settings['max_insert_threads'] = '16'; + Settings['max_insert_threads'] = '5'; 2 select count() from testX; -40 +800 select count() from testXA; -40 -select count() from testXB; -0 +800 select count() from testXC; -40 -select 'optimize_trivial_insert_select=0', 'max_insert_threads=0'; -optimize_trivial_insert_select=0 max_insert_threads=0 -insert into testX select number from numbers(10) settings +800 +select 'optimize_trivial_insert_select=0', 'max_insert_threads=0', 'iteration_num='; +optimize_trivial_insert_select=0 max_insert_threads=0 iteration_num= +insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_5', log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=0, - max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=0; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -115,20 +120,20 @@ select peak_threads_usage from system.query_log where Settings['max_insert_threads'] = '0'; 5 select count() from testX; -50 +1000 select count() from testXA; -50 -select count() from testXB; -0 +1000 select count() from testXC; -50 -select 'optimize_trivial_insert_select=0', 'max_insert_threads=16'; -optimize_trivial_insert_select=0 max_insert_threads=16 -insert into testX select number from numbers(10) settings +1000 +select 'optimize_trivial_insert_select=0', 'max_insert_threads=5', 'iteration_num='; +optimize_trivial_insert_select=0 max_insert_threads=5 iteration_num= +insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_6', log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=0, - max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -136,23 +141,23 @@ select peak_threads_usage from system.query_log where query like '%insert into testX %' and Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and - Settings['max_insert_threads'] = '16'; -5 + Settings['max_insert_threads'] = '5'; +12 select count() from testX; -60 +1200 select count() from testXA; -60 -select count() from testXB; -0 +1200 select count() from testXC; -60 -select 'optimize_trivial_insert_select=1', 'max_insert_threads=0'; -optimize_trivial_insert_select=1 max_insert_threads=0 -insert into testX select number from numbers(10) settings +1200 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=0', 'iteration_num='; +optimize_trivial_insert_select=1 max_insert_threads=0 iteration_num= +insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_7', log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=1, - max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=0; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -161,22 +166,22 @@ select peak_threads_usage from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '0'; -5 +2 select count() from testX; -70 +1400 select count() from testXA; -70 -select count() from testXB; -0 +1400 select count() from testXC; -70 -select 'optimize_trivial_insert_select=1', 'max_insert_threads=16'; -optimize_trivial_insert_select=1 max_insert_threads=16 -insert into testX select number from numbers(10) settings +1400 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=5', 'iteration_num='; +optimize_trivial_insert_select=1 max_insert_threads=5 iteration_num= +insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_8', log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=1, - max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -184,13 +189,11 @@ select peak_threads_usage from system.query_log where query like '%insert into testX %' and Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and - Settings['max_insert_threads'] = '16'; -5 + Settings['max_insert_threads'] = '5'; +7 select count() from testX; -80 +1600 select count() from testXA; -80 -select count() from testXB; -0 +1600 select count() from testXC; -80 +1600 diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 5918035e9c3..6805b8902ec 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -3,7 +3,13 @@ -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries -- avoid settings randomization by clickhouse-test -set max_threads = 0; +set max_threads = 10; + +-- more blocks to process +set max_block_size = 10; +set min_insert_block_size_rows = 10; + +set materialized_views_ignore_errors = 1; drop table if exists testX; drop table if exists testXA; @@ -16,17 +22,25 @@ create materialized view testXA engine=MergeTree order by tuple() as select slee create materialized view testXB engine=MergeTree order by tuple() as select sleep(0.2), throwIf(A=1) from testX; create materialized view testXC engine=MergeTree order by tuple() as select sleep(0.1) from testX; + +{% set count = namespace(value=0) %} + -- { echoOn } {% for parallel_view_processing in [0, 1] %} {% for optimize_trivial_insert_select in [0, 1] %} -{% for max_insert_threads in [0, 16] %} -select 'optimize_trivial_insert_select={{ optimize_trivial_insert_select }}', 'max_insert_threads={{ max_insert_threads }}'; +{% for max_insert_threads in [0, 5] %} -insert into testX select number from numbers(10) settings +{% set count.value = count.value + 1 %} + +select 'optimize_trivial_insert_select={{ optimize_trivial_insert_select }}', 'max_insert_threads={{ max_insert_threads }}', 'iteration_num={{ iteration_num }}'; + +insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_{{ count.value }}', log_queries=1, parallel_view_processing={{ parallel_view_processing }}, optimize_trivial_insert_select={{ optimize_trivial_insert_select }}, - max_insert_threads={{ max_insert_threads }}; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads={{ max_insert_threads }}; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -38,7 +52,6 @@ select peak_threads_usage from system.query_log where select count() from testX; select count() from testXA; -select count() from testXB; select count() from testXC; {% endfor %} {% endfor %} diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh index 9a80820dd58..60a65b9a253 100755 --- a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh +++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh @@ -6,9 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -n --query=" - set allow_deprecated_database_ordinary=1; DROP DATABASE IF EXISTS 01280_db; - CREATE DATABASE 01280_db Engine = Ordinary; + CREATE DATABASE 01280_db; DROP TABLE IF EXISTS 01280_db.table_for_dict; CREATE TABLE 01280_db.table_for_dict ( diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.reference b/tests/queries/0_stateless/01338_long_select_and_alter.reference index c2678e7052e..027109252e1 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter.reference @@ -1,3 +1,3 @@ -10 5 -CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = MergeTree\nORDER BY key\nSETTINGS index_granularity = 8192 +5 +CREATE TABLE default.alter_mt\n(\n `key` Int64,\n `value` Int64\n)\nENGINE = MergeTree\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 2b0709162a3..5d2759ac884 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -7,16 +7,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" -$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENGINE=MergeTree() ORDER BY key" +$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key Int64, value String) ENGINE=MergeTree() ORDER BY key" -$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" +$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number - 1 AS x, toString(x) FROM numbers(5)" $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & -# to be sure that select took all required locks +# To be sure that select took all required locks for better test sensitivity, although it isn't guaranteed (then the test will also succeed). sleep 2 -$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" +$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value Int64" $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference index b4ed8efab63..65e638bc3a4 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference @@ -1,3 +1,3 @@ -10 5 -CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01338_long_select_and_alter_zookeeper_default/alter_mt\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +5 +CREATE TABLE default.alter_mt\n(\n `key` Int64,\n `value` Int64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01338_long_select_and_alter_zookeeper_default/alter_mt\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 41e0a12f369..593a96a7cc8 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -7,16 +7,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" -$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_mt', '1') ORDER BY key" +$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key Int64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_mt', '1') ORDER BY key" -$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" +$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number - 1 AS x, toString(x) FROM numbers(5)" $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & -# to be sure that select took all required locks +# To be sure that select took all required locks for better test sensitivity, although it isn't guaranteed (then the test will also succeed). sleep 2 -$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" +$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value Int64" $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index b81bb75891d..bff85b3e29f 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -30,7 +30,7 @@ $CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 1 --min_inser for _ in {1..60}; do $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" - [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 100)) ]] && break; + [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break; sleep 1 done diff --git a/tests/queries/0_stateless/01412_cache_dictionary_race.sh b/tests/queries/0_stateless/01412_cache_dictionary_race.sh index 9aa39652021..36295ca01ea 100755 --- a/tests/queries/0_stateless/01412_cache_dictionary_race.sh +++ b/tests/queries/0_stateless/01412_cache_dictionary_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race +# Tags: race, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh b/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh index 2bfd350ec51..fa9238041b1 100755 --- a/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh +++ b/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race +# Tags: race, no-parallel set -e diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql index d6eec2f84a1..d387230d9e5 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns.sql @@ -1,4 +1,4 @@ --- Tags: no-object-storage, no-random-settings +-- Tags: no-object-storage, no-random-settings, no-parallel SET use_uncompressed_cache = 0; diff --git a/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql b/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql deleted file mode 100644 index cd027530ac8..00000000000 --- a/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql +++ /dev/null @@ -1,43 +0,0 @@ --- Tags: distributed, no-parallel - -CREATE DATABASE IF NOT EXISTS shard_0; -CREATE DATABASE IF NOT EXISTS shard_1; -CREATE DATABASE IF NOT EXISTS main_01487; -CREATE DATABASE IF NOT EXISTS test_01487; - -USE main_01487; - -DROP TABLE IF EXISTS shard_0.l; -DROP TABLE IF EXISTS shard_1.l; -DROP TABLE IF EXISTS d; -DROP TABLE IF EXISTS t; - -CREATE TABLE shard_0.l (value UInt8) ENGINE = MergeTree ORDER BY value; -CREATE TABLE shard_1.l (value UInt8) ENGINE = MergeTree ORDER BY value; -CREATE TABLE t (value UInt8) ENGINE = Memory; - -INSERT INTO shard_0.l VALUES (0); -INSERT INTO shard_1.l VALUES (1); -INSERT INTO t VALUES (0), (1), (2); - -CREATE TABLE d AS t ENGINE = Distributed(test_cluster_two_shards_different_databases, currentDatabase(), t); - -USE test_01487; -DROP DATABASE test_01487; - --- After the default database is dropped QueryAnalysisPass cannot process the following SELECT query. --- That query is invalid on the initiator node. -set allow_experimental_analyzer = 0; - -SELECT * FROM main_01487.d WHERE value IN (SELECT l.value FROM l) ORDER BY value; - -USE main_01487; - -DROP TABLE IF EXISTS shard_0.l; -DROP TABLE IF EXISTS shard_1.l; -DROP TABLE IF EXISTS d; -DROP TABLE IF EXISTS t; - -DROP DATABASE shard_0; -DROP DATABASE shard_1; -DROP DATABASE main_01487; diff --git a/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh b/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh index b8efee89b4a..5a146968a3b 100755 --- a/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh +++ b/tests/queries/0_stateless/01502_long_log_tinylog_deadlock_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock +# Tags: deadlock, no-parallel set -e diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index 80e9fc7e2fb..4756bd4e919 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -1,4 +1,4 @@ --- Tags: no-object-storage, no-random-merge-tree-settings +-- Tags: no-object-storage, no-random-merge-tree-settings, no-parallel -- no-s3 because read FileOpen metric DROP TABLE IF EXISTS nested; diff --git a/tests/queries/0_stateless/01592_long_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql index d2d32e24eaa..671245599cc 100644 --- a/tests/queries/0_stateless/01592_long_window_functions1.sql +++ b/tests/queries/0_stateless/01592_long_window_functions1.sql @@ -8,14 +8,14 @@ drop table if exists stack; set max_insert_threads = 4; create table stack(item_id Int64, brand_id Int64, rack_id Int64, dt DateTime, expiration_dt DateTime, quantity UInt64) -Engine = MergeTree -partition by toYYYYMM(dt) +Engine = MergeTree +partition by toYYYYMM(dt) order by (brand_id, toStartOfHour(dt)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -insert into stack -select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, +insert into stack +select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, toDateTime('2020-02-01 00:00:00')+number/10, intDiv(number,100)+1 -from numbers_mt(10000000); +from numbers_mt(1000000); select '---- arrays ----'; @@ -32,8 +32,8 @@ select '---- window f ----'; select cityHash64( toString( groupArray (tuple(*) ) )) from ( select brand_id, rack_id, quantity from ( select brand_id, rack_id, quantity, row_number() over (partition by brand_id, rack_id order by quantity) rn - from stack ) as t0 - where rn <= 2 + from stack ) as t0 + where rn <= 2 order by brand_id, rack_id, quantity ) t; diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.sql b/tests/queries/0_stateless/01748_partition_id_pruning.sql index b637528bc6c..f492a4a8871 100644 --- a/tests/queries/0_stateless/01748_partition_id_pruning.sql +++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql @@ -8,17 +8,17 @@ insert into x values (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); set max_rows_to_read = 3; -select * from x where _partition_id = partitionId(1); +select * from x where _partition_id = partitionID(1); set max_rows_to_read = 5; -- one row for subquery + subquery -select * from x where _partition_id in (select partitionId(number + 1) from numbers(1)); +select * from x where _partition_id in (select partitionID(number + 1) from numbers(1)); -- trivial count optimization test set max_rows_to_read = 2; -- one row for subquery + subquery itself -- TODO: Relax the limits because we might build prepared set twice with _minmax_count_projection set max_rows_to_read = 3; -select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1)); +select count() from x where _partition_id in (select partitionID(number + 1) from numbers(1)); drop table x; diff --git a/tests/queries/0_stateless/01778_mmap_cache_infra.sql b/tests/queries/0_stateless/01778_mmap_cache_infra.sql index 29a84c5507b..50fdb6ffbba 100644 --- a/tests/queries/0_stateless/01778_mmap_cache_infra.sql +++ b/tests/queries/0_stateless/01778_mmap_cache_infra.sql @@ -1,3 +1,4 @@ +-- Tags: no-parallel -- We check the existence of queries and metrics and don't check the results (a smoke test). SYSTEM DROP MMAP CACHE; diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh index edffc0a3807..0dfde4997a7 100755 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper +# Tags: long, zookeeper, no-parallel CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=error diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index ba42795333c..6287156daaf 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -16,6 +16,7 @@ CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRo CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; -- ENABLE LOGS +SET parallel_view_processing=0; SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; SET log_queries=1; diff --git a/tests/queries/0_stateless/02044_url_glob_parallel_connection_refused.sh b/tests/queries/0_stateless/02044_url_glob_parallel_connection_refused.sh index 7e8579f7cbe..b4b0ee8a023 100755 --- a/tests/queries/0_stateless/02044_url_glob_parallel_connection_refused.sh +++ b/tests/queries/0_stateless/02044_url_glob_parallel_connection_refused.sh @@ -8,7 +8,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) i=0 retries=5 # Connecting to wrong address and checking for race condition +# http_max_tries is limited to 2 because with the default 10 retries the execution time might go as high as around 3 minutes (because of exponential back-off). +# because of that we might see wrong 'tests hung' reports depending on how close to the end of tests run this particular test was executed. +# proper fix should be implemented in https://github.com/ClickHouse/ClickHouse/issues/66656 while [[ $i -lt $retries ]]; do - timeout 5s ${CLICKHOUSE_CLIENT} --max_threads 10 --query "SELECT * FROM url('http://128.0.0.{1..10}:${CLICKHOUSE_PORT_HTTP}/?query=SELECT+sleep(1)', TSV, 'x UInt8')" --format Null 2>/dev/null + timeout 5s ${CLICKHOUSE_CLIENT} --max_threads 10 --http_max_tries 2 --query "SELECT * FROM url('http://128.0.0.{1..10}:${CLICKHOUSE_PORT_HTTP}/?query=SELECT+sleep(1)', TSV, 'x UInt8')" --format Null 2>/dev/null ((++i)) done diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 8f62eda9233..cfae4fee6c2 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -286,7 +286,6 @@ CREATE TABLE system.functions ( `name` String, `is_aggregate` UInt8, - `is_deterministic` Nullable(UInt8), `case_insensitive` UInt8, `alias_to` String, `create_query` String, diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference index e0cc8f0ce63..2d9f236ada9 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference @@ -1,8 +1,8 @@ -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent -18 18 9 18 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent -18 9 9 9 -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent -18 18 9 18 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 +18 36 27 36 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data +18 18 18 18 +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 +18 36 27 36 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data 18 18 18 18 diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql index fdd75b91b1f..465c8d6136c 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql @@ -1,6 +1,6 @@ -- Tags: long -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; drop table if exists test sync; drop table if exists test_mv_a sync; @@ -35,7 +35,7 @@ select (select sum(c) from test_mv_c where test='case1'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data'; set deduplicate_blocks_in_dependent_materialized_views=1; @@ -53,7 +53,7 @@ select (select sum(c) from test_mv_c where test='case2'); -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; set deduplicate_blocks_in_dependent_materialized_views=0; @@ -70,7 +70,7 @@ select (select sum(c) from test_mv_b where test='case3'), (select sum(c) from test_mv_c where test='case3'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data'; set deduplicate_blocks_in_dependent_materialized_views=1; diff --git a/tests/queries/0_stateless/02125_many_mutations.reference b/tests/queries/0_stateless/02125_many_mutations.reference index c98d8221c7f..a94b8b78480 100644 --- a/tests/queries/0_stateless/02125_many_mutations.reference +++ b/tests/queries/0_stateless/02125_many_mutations.reference @@ -2,5 +2,5 @@ 1 1 20000 0 -0 20000 -1 20001 +0 20001 +1 20002 diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh index 474fc8ef199..239f30811d4 100755 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -48,6 +48,7 @@ $CLICKHOUSE_CLIENT --multiquery -q " select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; system start merges many_mutations; optimize table many_mutations final SETTINGS optimize_throw_if_noop = 1; +alter table many_mutations update y = y + 1 where 1 settings mutations_sync=2; select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; select x, y from many_mutations order by x; truncate table many_mutations; diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh index b7585991738..958f50287b2 100755 --- a/tests/queries/0_stateless/02125_many_mutations_2.sh +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -52,6 +52,7 @@ $CLICKHOUSE_CLIENT --multiquery -q " select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; system start merges many_mutations; optimize table many_mutations final SETTINGS optimize_throw_if_noop = 1; +alter table many_mutations update y = y + 1 where 1 settings mutations_sync=2; system flush logs; select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; select count() from many_mutations; diff --git a/tests/queries/0_stateless/02125_query_views_log.sql b/tests/queries/0_stateless/02125_query_views_log.sql index d2d19b76a1f..ba50902ebea 100644 --- a/tests/queries/0_stateless/02125_query_views_log.sql +++ b/tests/queries/0_stateless/02125_query_views_log.sql @@ -8,7 +8,7 @@ create table dst (key Int) engine=Null(); create materialized view mv1 to dst as select * from src; create materialized view mv2 to dst as select * from src; -insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=1; +insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=0; system flush logs; -- { echo } diff --git a/tests/queries/0_stateless/02210_processors_profile_log.sql b/tests/queries/0_stateless/02210_processors_profile_log.sql index 59edbb71457..75e5bcbb585 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.sql +++ b/tests/queries/0_stateless/02210_processors_profile_log.sql @@ -14,10 +14,12 @@ SELECT multiIf( -- ExpressionTransform executes sleep(), -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>=1e6 ? 1 : elapsed_us, + -- We use two different timers to measure time: CLOCK_MONOTONIC for sleep and CLOCK_MONOTONIC_COARSE for profiling + -- that's why we cannot compare directly with 1,000,000 microseconds - let's compare with 900,000 microseconds. + name = 'ExpressionTransform', elapsed_us >= 0.9e6 ? 1 : elapsed_us, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6 ? 1 : output_wait_elapsed_us, + name = 'SourceFromSingleChunk', output_wait_elapsed_us >= 0.9e6 ? 1 : output_wait_elapsed_us, -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs -- so they cannot starts to execute before sleep(1) will be executed. input_wait_elapsed_us>=1e6 ? 1 : input_wait_elapsed_us) diff --git a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql index f1f7c876ba6..3d6b69fe161 100644 --- a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql +++ b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql @@ -29,7 +29,7 @@ from in_02231 group by key; set optimize_trivial_insert_select = 1; -insert into in_02231 select * from numbers(10e6) settings max_memory_usage='310Mi', max_threads=1; +insert into in_02231 select * from numbers(10e6) settings max_memory_usage='400Mi', max_threads=1; drop table buffer_02231; drop table out_02231; diff --git a/tests/queries/0_stateless/02240_asof_join_biginteger.reference b/tests/queries/0_stateless/02240_asof_join_biginteger.reference index cac55eec430..f7eb4d74375 100644 --- a/tests/queries/0_stateless/02240_asof_join_biginteger.reference +++ b/tests/queries/0_stateless/02240_asof_join_biginteger.reference @@ -2,3 +2,7 @@ 0 340282366920938463463374607431768211457 0 18446744073709551617 0 340282366920938463463374607431768211457 +0 18446744073709551617 +0 340282366920938463463374607431768211457 +0 18446744073709551617 +0 340282366920938463463374607431768211457 diff --git a/tests/queries/0_stateless/02240_asof_join_biginteger.sql b/tests/queries/0_stateless/02240_asof_join_biginteger.sql index 6dc5b00f116..a5c1faae4ea 100644 --- a/tests/queries/0_stateless/02240_asof_join_biginteger.sql +++ b/tests/queries/0_stateless/02240_asof_join_biginteger.sql @@ -3,3 +3,11 @@ select * from (select 0 as k, toInt256('340282366920938463463374607431768211457' select * from (select 0 as k, toUInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toUInt128('18446744073709551616') as v) t2 using(k, v); select * from (select 0 as k, toUInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toUInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); + +SET join_algorithm = 'full_sorting_merge'; + +select * from (select 0 as k, toInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toInt128('18446744073709551616') as v) t2 using(k, v); +select * from (select 0 as k, toInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); + +select * from (select 0 as k, toUInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toUInt128('18446744073709551616') as v) t2 using(k, v); +select * from (select 0 as k, toUInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toUInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); diff --git a/tests/queries/0_stateless/02241_join_rocksdb_bs.reference b/tests/queries/0_stateless/02241_join_rocksdb_bs.reference index 8416a2991c1..4dff9ef38ef 100644 --- a/tests/queries/0_stateless/02241_join_rocksdb_bs.reference +++ b/tests/queries/0_stateless/02241_join_rocksdb_bs.reference @@ -10,59 +10,3 @@ 1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 b/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 index 6121db6d6a2..e5703f99d62 100644 --- a/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 +++ b/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 @@ -1,4 +1,4 @@ --- Tags: use-rocksdb, long +-- Tags: use-rocksdb, long, no-s3-storage SET join_algorithm = 'direct'; @@ -13,27 +13,21 @@ INSERT INTO rdb_{{ table_size }} SELECT (sipHash64(number) % {{ table_size }}) as key, ('val' || toString(key)) AS value FROM numbers_mt({{ table_size }}); -{% for block_size in [10, 11, 128, 129, 65505, 65506, 70000] -%} - -{% if block_size * 5000 > table_size -%} - -SET max_block_size = {{ block_size }}; - {% for right_size in [table_size // 2, table_size + table_size // 4 + 1] -%} SELECT count() == (SELECT count() FROM rdb_{{ table_size }} WHERE key < {{ right_size }}) FROM (SELECT number as k FROM numbers_mt({{ right_size }})) as t1 INNER JOIN rdb_{{ table_size }} as rdb -ON rdb.key == t1.k; +ON rdb.key == t1.k +{% if table_size < 100 %}SETTINGS max_block_size = 1{% endif -%} +; SELECT count() == {{ right_size }} and countIf(value != '') == (SELECT count() FROM rdb_{{ table_size }} WHERE key < {{ right_size }}) FROM (SELECT number as k FROM numbers_mt({{ right_size }})) as t1 LEFT JOIN rdb_{{ table_size }} as rdb -ON rdb.key == t1.k; - -{% endfor -%} - -{% endif -%} +ON rdb.key == t1.k +{% if table_size < 100 %}SETTINGS max_block_size = 1{% endif -%} +; {% endfor -%} {% endfor -%} @@ -41,4 +35,3 @@ ON rdb.key == t1.k; {% for table_size in [10, 65555, 100000] -%} DROP TABLE IF EXISTS rdb_{{ table_size }}; {% endfor -%} - diff --git a/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql b/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql index a4e60ff54dd..0b10101d8f2 100644 --- a/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql +++ b/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql @@ -19,8 +19,6 @@ SELECT * FROM t1 ANTI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENT SELECT * FROM t1 SEMI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENTED } -SELECT * FROM t1 ASOF JOIN t2 ON t1.key = t2.key AND t1.val > t2.val; -- { serverError NOT_IMPLEMENTED } - SELECT * FROM t1 ANY JOIN t2 ON t1.key = t2.key SETTINGS any_join_distinct_right_table_keys = 1; -- { serverError NOT_IMPLEMENTED } SELECT * FROM t1 JOIN t2 USING (key) SETTINGS join_use_nulls = 1; -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/02370_analyzer_in_function.reference b/tests/queries/0_stateless/02370_analyzer_in_function.reference index fda174c0b7c..49a080c2616 100644 --- a/tests/queries/0_stateless/02370_analyzer_in_function.reference +++ b/tests/queries/0_stateless/02370_analyzer_in_function.reference @@ -12,3 +12,6 @@ 1 1 0 +1 +1 +1 diff --git a/tests/queries/0_stateless/02370_analyzer_in_function.sql b/tests/queries/0_stateless/02370_analyzer_in_function.sql index 7287c94deda..a6e4400e101 100644 --- a/tests/queries/0_stateless/02370_analyzer_in_function.sql +++ b/tests/queries/0_stateless/02370_analyzer_in_function.sql @@ -21,3 +21,7 @@ SELECT (1, 2) IN 1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT (1, 2) IN [1]; -- { serverError INCORRECT_ELEMENT_OF_SET } SELECT (1, 2) IN (((1, 2), (1, 2)), ((1, 2), (1, 2))); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT (1, 2) IN [((1, 2), (1, 2)), ((1, 2), (1, 2))]; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select (select 1) in (1); +select in(untuple(((1), (1)))); +select in(untuple(((select 1), (1)))); diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.reference b/tests/queries/0_stateless/02378_analyzer_projection_names.reference index fd5bc7d4ae8..532414f117c 100644 --- a/tests/queries/0_stateless/02378_analyzer_projection_names.reference +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.reference @@ -443,6 +443,18 @@ SELECT '--'; -- DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN test_table_in_cte FROM test_table); in(id, test_table_in_cte) UInt8 +SELECT '--'; +-- +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); +test_table_in_cte_1.c1 UInt8 +test_table_in_cte_2.c1 UInt8 +SELECT '--'; +-- +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1 UNION ALL SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); +test_table_in_cte_1.c1 UInt8 +test_table_in_cte_2.c1 UInt8 SELECT 'Joins'; Joins DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.sql b/tests/queries/0_stateless/02378_analyzer_projection_names.sql index f5ac5f7476f..f41afe6a950 100644 --- a/tests/queries/0_stateless/02378_analyzer_projection_names.sql +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.sql @@ -408,6 +408,16 @@ SELECT '--'; DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN test_table_in_cte FROM test_table); +SELECT '--'; + +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); + +SELECT '--'; + +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1 UNION ALL SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); + SELECT 'Joins'; DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); diff --git a/tests/queries/0_stateless/02417_load_marks_async.sh b/tests/queries/0_stateless/02417_load_marks_async.sh index 72b35a565df..950656e7ab6 100755 --- a/tests/queries/0_stateless/02417_load_marks_async.sh +++ b/tests/queries/0_stateless/02417_load_marks_async.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02440_mutations_finalization.sql b/tests/queries/0_stateless/02440_mutations_finalization.sql index c522d8ab9df..92ed6a327ad 100644 --- a/tests/queries/0_stateless/02440_mutations_finalization.sql +++ b/tests/queries/0_stateless/02440_mutations_finalization.sql @@ -1,3 +1,4 @@ +-- Tags: no-parallel create table mut (n int) engine=ReplicatedMergeTree('/test/02440/{database}/mut', '1') order by tuple(); set insert_keeper_fault_injection_probability=0; diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh index d72d771a150..6a47b7d8d61 100755 --- a/tests/queries/0_stateless/02443_detach_attach_partition.sh +++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh @@ -73,7 +73,7 @@ kill -TERM $PID_1 && kill -TERM $PID_2 && kill -TERM $PID_3 && kill -TERM $PID_4 wait $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'threads finished'" -wait_for_queries_to_finish +wait_for_queries_to_finish 60 $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" diff --git a/tests/queries/0_stateless/02477_age_datetime64.reference b/tests/queries/0_stateless/02477_age_datetime64.reference index 3b4459dd26d..fb085f461c9 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.reference +++ b/tests/queries/0_stateless/02477_age_datetime64.reference @@ -111,3 +111,8 @@ SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), ma 1 SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); 1 +-- UBsan bug #66638 +set session_timezone = 'UTC'; +SELECT age('second', toDateTime(1157339245694594829, 6, 'UTC'), toDate('2015-08-18')) + +-8973935999 diff --git a/tests/queries/0_stateless/02477_age_datetime64.sql b/tests/queries/0_stateless/02477_age_datetime64.sql index 1bed93991ca..b5fa4da8837 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.sql +++ b/tests/queries/0_stateless/02477_age_datetime64.sql @@ -75,3 +75,7 @@ SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); + +-- UBsan bug #66638 +set session_timezone = 'UTC'; +SELECT age('second', toDateTime(1157339245694594829, 6, 'UTC'), toDate('2015-08-18')) diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference index 4d5a7447a49..ada13443a10 100644 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference @@ -1,102 +1,42 @@ flat --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third flat/custom --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third hashed --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third hashed/custom --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third complex_key_hashed --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third complex_key_hashed/custom --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.sh b/tests/queries/0_stateless/02530_dictionaries_update_field.sh index 9ced78a1196..a8e5804fba4 100755 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.sh +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.sh @@ -35,7 +35,7 @@ for layout in "${layouts[@]}"; do echo "$layout" fi - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT --multiquery " TRUNCATE TABLE table_for_update_field_dictionary; CREATE DICTIONARY $dictionary_name @@ -49,24 +49,31 @@ for layout in "${layouts[@]}"; do LAYOUT($layout()) LIFETIME(1); - -- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); - SELECT key, value FROM $dictionary_name ORDER BY key ASC; + INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now());" - INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); - SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; + while true + do + $CLICKHOUSE_CLIENT --query "SELECT key, value FROM $dictionary_name ORDER BY key ASC" | grep -A10 -B10 'First' && break; + sleep .1; + done - SELECT key, value FROM $dictionary_name ORDER BY key ASC; + $CLICKHOUSE_CLIENT --query "INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());" - INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); - INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); - SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; + while true + do + $CLICKHOUSE_CLIENT --query "SELECT key, value FROM $dictionary_name ORDER BY key ASC" | grep -A10 -B10 'Second' && break; + sleep .1; + done - SELECT key, value FROM $dictionary_name ORDER BY key ASC; - -- { echoOff } + $CLICKHOUSE_CLIENT --query "INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()), (3, 'Third', now())" - DROP DICTIONARY $dictionary_name; - " + while true + do + $CLICKHOUSE_CLIENT --query "SELECT key, value FROM $dictionary_name ORDER BY key ASC" | grep -A10 -B10 'SecondUpdated' && break; + sleep .1; + done + + $CLICKHOUSE_CLIENT --query "DROP DICTIONARY $dictionary_name" done done diff --git a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference index 63610604ddd..858fbe98838 100644 --- a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference +++ b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference @@ -264,3 +264,44 @@ SETTINGS group_by_use_nulls = 1, max_bytes_before_external_sort=10; 9 \N 9 \N 0 20 \N 1 25 +CREATE TABLE test +ENGINE = ReplacingMergeTree +PRIMARY KEY id +AS SELECT number AS id FROM numbers(100); +SELECT id +FROM test +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT id + FROM test +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; +SELECT id +FROM test +FINAL +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT DISTINCT id + FROM test + FINAL +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; +SELECT id +FROM test +FINAL +GROUP BY + GROUPING SETS ((id)) +ORDER BY + id IN ( + SELECT DISTINCT id + FROM test + FINAL + LIMIT 4 + ) ASC +LIMIT 256 BY id +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls=true; diff --git a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql index a4d4f2f8bc9..4ae5df9629a 100644 --- a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql +++ b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql @@ -83,3 +83,48 @@ GROUP BY ) ORDER BY 1, tuple(val) SETTINGS group_by_use_nulls = 1, max_bytes_before_external_sort=10; + +CREATE TABLE test +ENGINE = ReplacingMergeTree +PRIMARY KEY id +AS SELECT number AS id FROM numbers(100); + +SELECT id +FROM test +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT id + FROM test +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; + +SELECT id +FROM test +FINAL +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT DISTINCT id + FROM test + FINAL +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; + +SELECT id +FROM test +FINAL +GROUP BY + GROUPING SETS ((id)) +ORDER BY + id IN ( + SELECT DISTINCT id + FROM test + FINAL + LIMIT 4 + ) ASC +LIMIT 256 BY id +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls=true; diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql index 13dfb5debe7..6aa70a379c1 100644 --- a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql @@ -6,3 +6,15 @@ ASOF LEFT JOIN ( select 1 as session_id, 4 as id ) as visitors ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id +; + +select count(*) +from ( + select 1 as id, [1, 2, 3] as arr +) as sessions +ASOF LEFT JOIN ( + select 1 as session_id, 4 as id +) as visitors +ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id +SETTINGS join_algorithm = 'full_sorting_merge' +; diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.sh b/tests/queries/0_stateless/02724_limit_num_mutations.sh index a9d69b2ed48..60888db0e2e 100755 --- a/tests/queries/0_stateless/02724_limit_num_mutations.sh +++ b/tests/queries/0_stateless/02724_limit_num_mutations.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index e41f2521f5c..ac0461fc506 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -2,33 +2,33 @@ tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- EXTENDED tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- WHERE --- Check with weird table names @@ -40,8 +40,8 @@ NULL 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e -tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES -tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 a A 0 \N \N \N PRIMARY YES tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- Equally named table in other database tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql index 1ae24e4f3da..e393d96fc40 100644 --- a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -6,7 +6,8 @@ CREATE TABLE t_async_inserts_flush (a UInt64) ENGINE = Memory; SET async_insert = 1; SET wait_for_async_insert = 0; -SET async_insert_busy_timeout_min_ms = 1000000; +-- Disable adaptive timeout to prevent immediate push of the first message (if the queue last push was old) +SET async_insert_use_adaptive_busy_timeout=0; SET async_insert_busy_timeout_max_ms = 10000000; INSERT INTO t_async_inserts_flush VALUES (1) (2); diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh index e4a1de9a2ec..751b2798243 100755 --- a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh @@ -18,10 +18,7 @@ query="SELECT a, b FROM test_s3" query_id=$(${CLICKHOUSE_CLIENT} --query "select queryID() from ($query) limit 1" 2>&1) ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" ${CLICKHOUSE_CLIENT} -nm --query " -WITH - ProfileEvents['DiskConnectionsReset'] AS reset, - ProfileEvents['DiskConnectionsPreserved'] AS preserved -SELECT preserved > reset +SELECT ProfileEvents['DiskConnectionsPreserved'] > 0 FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql deleted file mode 100644 index 0b7337d1255..00000000000 --- a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql +++ /dev/null @@ -1,3 +0,0 @@ -select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=1 format Null; -select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=1, use_hedged_requests=0 format Null; -select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=0 format Null; diff --git a/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh index 50f5e565088..68c55f9b66a 100755 --- a/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh +++ b/tests/queries/0_stateless/02815_no_throw_in_simple_queries.sh @@ -45,9 +45,4 @@ expect eof EOF } -run "$CLICKHOUSE_LOCAL --disable_suggestion" -# Suggestions are off because the suggestion feature initializes itself by reading all available function -# names from "system.functions". Getting the value for field "is_obsolete" occasionally throws (e.g. for -# certain dictionary functions when dictionaries are not set up yet). Exceptions are properly handled, but -# they exist for a short time. This, in combination with CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION, terminates -# clickhouse-local and clickhouse-client when run in interactive mode *with* suggestions. +run "$CLICKHOUSE_LOCAL" diff --git a/tests/queries/0_stateless/02834_apache_arrow_abort.sql b/tests/queries/0_stateless/02834_apache_arrow_abort.sql index 47e1c5d3951..cb0eaebe554 100644 --- a/tests/queries/0_stateless/02834_apache_arrow_abort.sql +++ b/tests/queries/0_stateless/02834_apache_arrow_abort.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-tsan, no-asan, no-msan, no-ubsan -- This tests depends on internet access, but it does not matter, because it only has to check that there is no abort due to a bug in Apache Arrow library. SET optimize_trivial_insert_select=1; INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } diff --git a/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.reference b/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.reference index ba63f2f7e9c..a9eae234dba 100644 --- a/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.reference +++ b/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.reference @@ -1,3 +1,3 @@ Parquet -e76a749f346078a6a43e0cbd25f0d18a - +3249508141921544766 400 diff --git a/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.sh b/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.sh index 83196458a84..c96531ffea9 100755 --- a/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.sh +++ b/tests/queries/0_stateless/02874_parquet_multiple_batches_array_inconsistent_offsets.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-ubsan, no-fasttest +# Tags: long, no-ubsan, no-fasttest, no-parallel, no-asan, no-msan, no-tsan +# This test requires around 10 GB of memory and it is just too much. CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -121,9 +122,12 @@ echo "Parquet" #} DATA_FILE=$CUR_DIR/data_parquet/string_int_list_inconsistent_offset_multiple_batches.parquet -${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (ints Array(Int64), strings Nullable(String)) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load" -${CLICKHOUSE_CLIENT} --query="drop table parquet_load" \ No newline at end of file + +${CLICKHOUSE_LOCAL} --multiquery " +DROP TABLE IF EXISTS parquet_load; +CREATE TABLE parquet_load (ints Array(Int64), strings Nullable(String)) ENGINE = Memory; +INSERT INTO parquet_load FROM INFILE '$DATA_FILE'; +SELECT sum(cityHash64(*)) FROM parquet_load; +SELECT count() FROM parquet_load; +DROP TABLE parquet_load; +" diff --git a/tests/queries/0_stateless/02884_async_insert_skip_settings.reference b/tests/queries/0_stateless/02884_async_insert_skip_settings.reference index 318966ce93b..957be52c59b 100644 --- a/tests/queries/0_stateless/02884_async_insert_skip_settings.reference +++ b/tests/queries/0_stateless/02884_async_insert_skip_settings.reference @@ -1,5 +1,5 @@ -4 +pending to flush 4 1 1 2 -1 +flush queries 1 diff --git a/tests/queries/0_stateless/02884_async_insert_skip_settings.sql b/tests/queries/0_stateless/02884_async_insert_skip_settings.sql index 9bc689fb4ec..df715af873a 100644 --- a/tests/queries/0_stateless/02884_async_insert_skip_settings.sql +++ b/tests/queries/0_stateless/02884_async_insert_skip_settings.sql @@ -9,7 +9,8 @@ ORDER BY id; SET async_insert = 1; SET async_insert_deduplicate = 1; SET wait_for_async_insert = 0; -SET async_insert_busy_timeout_min_ms = 100000; +-- Disable adaptive timeout to prevent immediate push of the first message (if the queue last push was old) +SET async_insert_use_adaptive_busy_timeout=0; SET async_insert_busy_timeout_max_ms = 1000000; SET insert_deduplication_token = '1'; @@ -30,7 +31,7 @@ INSERT INTO t_async_insert_skip_settings VALUES (2); SYSTEM FLUSH LOGS; -SELECT length(entries.bytes) FROM system.asynchronous_inserts +SELECT 'pending to flush', length(entries.bytes) FROM system.asynchronous_inserts WHERE database = currentDatabase() AND table = 't_async_insert_skip_settings' ORDER BY first_update; @@ -40,7 +41,7 @@ SELECT * FROM t_async_insert_skip_settings ORDER BY id; SYSTEM FLUSH LOGS; -SELECT uniqExact(flush_query_id) FROM system.asynchronous_insert_log +SELECT 'flush queries', uniqExact(flush_query_id) FROM system.asynchronous_insert_log WHERE database = currentDatabase() AND table = 't_async_insert_skip_settings'; DROP TABLE t_async_insert_skip_settings SYNC; diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.reference b/tests/queries/0_stateless/02890_named_tuple_functions.reference index f7a0c440b5a..6b36ff2c54c 100644 --- a/tests/queries/0_stateless/02890_named_tuple_functions.reference +++ b/tests/queries/0_stateless/02890_named_tuple_functions.reference @@ -7,3 +7,4 @@ Tuple(\n k UInt8,\n j Int32) Tuple(Int32, Int32, Int32, Int32) ['1','2','3','4'] (1,2,3) +Tuple(Nullable(Nothing)) Tuple(Bool) Tuple(Bool) diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.sql b/tests/queries/0_stateless/02890_named_tuple_functions.sql index 8e0c9c2b10e..0033102bd53 100644 --- a/tests/queries/0_stateless/02890_named_tuple_functions.sql +++ b/tests/queries/0_stateless/02890_named_tuple_functions.sql @@ -28,4 +28,7 @@ create table tbl (x Tuple(a Int32, b Int32, c Int32)) engine MergeTree order by insert into tbl values (tuple(1, 2, 3)); -- without tuple it's interpreted differently inside values block. select * from tbl; -drop table tbl +drop table tbl; + +-- Avoid generating named tuple for special keywords +select toTypeName(tuple(null)), toTypeName(tuple(true)), toTypeName(tuple(false)); diff --git a/tests/queries/0_stateless/02892_orc_filter_pushdown.reference b/tests/queries/0_stateless/02892_orc_filter_pushdown.reference index 903d42bf492..0be6d13adb3 100644 --- a/tests/queries/0_stateless/02892_orc_filter_pushdown.reference +++ b/tests/queries/0_stateless/02892_orc_filter_pushdown.reference @@ -205,7 +205,7 @@ select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative 600 419700 select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null < -500); 596 -1099 -501 -select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null is null); +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null is null) settings allow_experimental_analyzer=1; 0 0 select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null is null); 0 0 0 diff --git a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql index e3736de6a17..29055eb911a 100644 --- a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql +++ b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql @@ -206,7 +206,7 @@ select count(), min(nEgAtIvE_oR_nUlL), max(nEgAtIvE_oR_nUlL) from file('02892.or select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null < -500); select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null < -500); -select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null is null); +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null is null) settings allow_experimental_analyzer=1; select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null is null); select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null in (0, -1, -10, -100, -1000)); diff --git a/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference b/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference index 6ee8d0c3d23..27222968b1a 100644 --- a/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference +++ b/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference @@ -1,4 +1,4 @@ -CreateQuery numbers_pv (children 2) +CreateQuery numbers_pv (children 2) Identifier numbers_pv SelectWithUnionQuery (children 1) ExpressionList (children 1) diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 4eb7e74446d..31a1cda18e7 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -35,6 +35,10 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N 2 2 2 2 3 3 3 33 \N \N \N \N +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +2 2 2 2 +3 3 3 33 +\N \N \N \N -- aliases defined in the join condition are valid -- FIXME(@vdimir) broken query formatting for the following queries: -- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index f7813e2a1b4..f739259caf9 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -34,6 +34,7 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) A SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- aliases defined in the join condition are valid -- FIXME(@vdimir) broken query formatting for the following queries: diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index 335b55f05c8..07deb7c2565 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -10,13 +10,14 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 42 +2022-09-01 12:00:00 84 +2023-09-01 12:00:00 42 -- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1 -- Landing 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 42 +2022-09-01 12:00:00 84 2023-09-01 12:00:00 42 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 -- Landing (Agg/Replacing)MergeTree diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index f206f0d7775..a2378fd8f67 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -54,8 +54,9 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 1st insert works for landing and mv tables - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded + - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded, now that block is inserted because deduplicate_blocks_in_dependent_materialized_views=0 + Now it is fixed. */ SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000; @@ -97,7 +98,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view This is what happens now: - 1st insert works for landing and mv tables - - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables + - 2nd insert gets first block 20220901 deduplicated for landing and both rows are inserted for mv tables */ SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql index b9bc6974c47..c36b5bebd58 100644 --- a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql +++ b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql @@ -6,6 +6,9 @@ INSERT INTO 02918_parallel_replicas SELECT toString(number), number % 4 FROM num SET prefer_localhost_replica=0; +--- if we try to query unavaialble replica, connection will be retried +--- but a warning log message will be printed out +SET send_logs_level='error'; -- { echoOn } SELECT y, count() FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) @@ -26,5 +29,6 @@ GROUP BY y ORDER BY y SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; -- { echoOff } +SET send_logs_level='warning'; DROP TABLE 02918_parallel_replicas; diff --git a/tests/queries/0_stateless/02944_variant_as_common_type.sql b/tests/queries/0_stateless/02944_variant_as_common_type.sql index 49ea5f2769c..b3b86427b06 100644 --- a/tests/queries/0_stateless/02944_variant_as_common_type.sql +++ b/tests/queries/0_stateless/02944_variant_as_common_type.sql @@ -71,4 +71,3 @@ select toTypeName(res), array([1, 2, 3], [[1, 2, 3]]) as res; select toTypeName(res), map('a', 1, 'b', 'str_1') as res; select toTypeName(res), map('a', 1, 'b', map('c', 2, 'd', 'str_1')) as res; select toTypeName(res), map('a', 1, 'b', [1, 2, 3], 'c', [[4, 5, 6]]) as res; - diff --git a/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql index 248475ab84b..24425062116 100644 --- a/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql +++ b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.sql @@ -1,3 +1,6 @@ +-- Tags: no-random-merge-tree-settings, no-random-settings +-- Because we compare part sizes, and they could be affected by index granularity and index compression settings. + CREATE TABLE part_log_bytes_uncompressed ( key UInt8, value UInt8 @@ -17,7 +20,8 @@ ALTER TABLE part_log_bytes_uncompressed DROP PART 'all_4_4_0' SETTINGS mutations SYSTEM FLUSH LOGS; -SELECT event_type, table, part_name, bytes_uncompressed > 0, size_in_bytes < bytes_uncompressed FROM system.part_log +SELECT event_type, table, part_name, bytes_uncompressed > 0, (bytes_uncompressed > 0 ? (size_in_bytes < bytes_uncompressed ? '1' : toString((size_in_bytes, bytes_uncompressed))) : '0') +FROM system.part_log WHERE event_date >= yesterday() AND database = currentDatabase() AND table = 'part_log_bytes_uncompressed' AND (event_type != 'RemovePart' OR part_name = 'all_4_4_0') -- ignore removal of other parts ORDER BY part_name, event_type; diff --git a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference index 74c71827e6e..1f140df1d6b 100644 --- a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference +++ b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference @@ -3,7 +3,7 @@ 1000 1 1000 -2 +1 1000000 1000 0 999001 diff --git a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh index f7111d0afe2..95c136584f0 100755 --- a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh +++ b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh @@ -29,7 +29,7 @@ ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" ${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;" # Must set both max_threads and max_insert_threads to 2 to make sure there is only two sinks ${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000) SETTINGS max_threads = 2, max_insert_threads = 2, max_block_size = 10000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, insert_deduplication_token = '', optimize_trivial_insert_select = 1;" -${CLICKHOUSE_CLIENT} --query "SELECT sum(value) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be 2 because default bulk sink size is ~1M rows / SST file +${CLICKHOUSE_CLIENT} --query "SELECT sum(value) IN (1, 2) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be not more than 2 because default bulk sink size is ~1M rows / SST file. ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" # Testing insert with duplicated keys diff --git a/tests/queries/0_stateless/02970_generate_series.reference b/tests/queries/0_stateless/02970_generate_series.reference index 6e6f3c81587..f8a8e1891b7 100644 --- a/tests/queries/0_stateless/02970_generate_series.reference +++ b/tests/queries/0_stateless/02970_generate_series.reference @@ -5,6 +5,7 @@ 501 50 17928 +17928 0 10 13 diff --git a/tests/queries/0_stateless/02970_generate_series.sql b/tests/queries/0_stateless/02970_generate_series.sql index 0844714b3a6..edae884a561 100644 --- a/tests/queries/0_stateless/02970_generate_series.sql +++ b/tests/queries/0_stateless/02970_generate_series.sql @@ -5,6 +5,7 @@ SELECT count() FROM generate_series(7, 77, 10); SELECT count() FROM generate_series(0, 1000, 2); SELECT count() FROM generate_series(0, 999, 20); SELECT sum(generate_series) FROM generate_series(4, 1008, 4) WHERE generate_series % 7 = 1; +SELECT sum(generate_series) FROM generate_series(4, 1008, 4) WHERE generate_series % 7 = 1 SETTINGS max_block_size = 71; SELECT * FROM generate_series(5, 4); SELECT * FROM generate_series(0, 0); diff --git a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql index ede6e0deed9..2a9e673d7bc 100644 --- a/tests/queries/0_stateless/02992_analyzer_group_by_const.sql +++ b/tests/queries/0_stateless/02992_analyzer_group_by_const.sql @@ -30,3 +30,46 @@ SELECT min(dummy) FROM remote('127.0.0.{2,3}', system.one) GROUP BY y; + +CREATE TABLE ttt (hr DateTime, ts DateTime) ENGINE=Memory +as select '2000-01-01' d, d; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{1,2}', currentDatabase(), ttt) +GROUP BY c1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{3,2}', currentDatabase(), ttt) +GROUP BY c1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{1,2}', currentDatabase(), ttt) +GROUP BY c1 + 1 FORMAT Null; + +SELECT + count(), + now() AS c1 +FROM remote('127.0.0.{3,2}', currentDatabase(), ttt) +GROUP BY c1 + 1 FORMAT Null; + +SELECT + count(), + tuple(nullIf(toDateTime(formatDateTime(hr, '%F %T', 'America/Los_Angeles'), 'America/Los_Angeles'), toDateTime(0))) as c1, + defaultValueOfArgumentType(toTimeZone(ts, 'America/Los_Angeles')) as c2, + formatDateTime(hr, '%F %T', 'America/Los_Angeles') as c3 +FROM remote('127.0.0.{1,2}', currentDatabase(), ttt) +GROUP BY c1, c2, c3 FORMAT Null; + +SELECT + count(), + tuple(nullIf(toDateTime(formatDateTime(hr, '%F %T', 'America/Los_Angeles'), 'America/Los_Angeles'), toDateTime(0))) as c1, + defaultValueOfArgumentType(toTimeZone(ts, 'America/Los_Angeles')) as c2, + formatDateTime(hr, '%F %T', 'America/Los_Angeles') as c3 +FROM remote('127.0.0.{3,2}', currentDatabase(), ttt) +GROUP BY c1, c2, c3 FORMAT Null; diff --git a/tests/queries/0_stateless/02995_index.reference b/tests/queries/0_stateless/02995_index.reference deleted file mode 100644 index 1e8639caa88..00000000000 --- a/tests/queries/0_stateless/02995_index.reference +++ /dev/null @@ -1,126 +0,0 @@ -12 4 21722 2209341 4 1415 2333 4 61 64 3 -21 1134 11363 58484 1106 1458 1592 136 26 62 32 -22 210 4504 5729 196 291 767 124 47 54 8 -26 196 1327684 5221 195 4140 5661 161 28 49 19 -28 5 2034378 7102 5 325 3255 2 53 60 4 -29 53 45041 45189 45 1580 211 31 55 84 18 -38 424 1600675 4653 424 562 5944 244 60 65 6 -45 17 62743 674873 17 6239 6494 17 65 76 8 -72 1862 1210073 6200 1677 2498 528 859 51 61 11 -79 2 2255228 2255293 2 5495 7057 2 65 65 1 -85 459 1051571 1829638 459 6402 7131 334 32 61 25 -86 10 1748130 1754217 10 4374 7003 10 56 59 4 -91 165 5718 5802 75 282 7113 112 41 63 22 -94 20 1231916 2050003 20 4802 4917 19 53 59 7 -99 2 3665 36667 2 497 697 2 70 71 2 -103 1 2446615 2446615 1 2498 2498 1 58 58 1 -106 72 6149 6699 67 527 826 40 61 61 1 -111 43 2273186 5272 43 492 4923 4 54 72 15 -120 3129 45117 6735 2868 1030 1625 561 59 64 6 -138 2 49243 49374 2 1428 1519 2 47 48 2 -143 100 23321 63639 100 1115 1624 88 51 51 1 -145 1 2447976 2447976 1 6173 6173 1 44 44 1 -153 16 13748 16881 16 1506 1636 16 54 68 9 -159 19952 1525336 7131 12957 1280 6163 2668 24 66 39 -171 5 15042 16698 5 1302 1608 5 65 65 1 -179 6264 1362341 2686 6244 2554 7132 2705 61 67 7 -192 1 1639623 1639623 1 3406 3406 1 32 32 1 -193 1 1429969 1429969 1 7131 7131 1 45 45 1 -207 12 23057 32500 12 1491 1726 12 32 46 7 -221 5081 1366870 6649 3432 4527 5226 687 24 69 39 -228 73 12281 17929 71 1328 2034 63 49 71 18 -229 2 1617478 1723791 2 4590 5578 2 41 42 2 -230 3916 1332729 6949 3668 1330 4703 845 62 65 4 -238 25 2624456 2625673 24 2535 6465 25 58 75 14 -241 154 2554929 2616444 154 2626 7131 148 34 57 17 -248 276 15529 30987 274 1040 1222 136 37 79 27 -254 3018 33966 6635 2837 1057 1622 539 24 60 33 -255 20 1581774 1811334 20 6068 6301 18 33 57 10 -256 5 5145 6841 5 367 376 5 58 58 1 -270 2 2195579 2262119 2 7102 7123 2 33 34 2 -281 32 2379460 616215 32 6042 6086 23 53 64 12 -282 7 1292651 24244 7 1607 2455 6 46 55 5 -286 123 1521935 5269 123 3793 3940 81 40 66 22 -291 21 2419080 3567 21 297 4731 21 54 55 2 -316 4 5221 5616 4 505 558 4 32 35 3 -319 232 56480 63033 230 1599 313 50 33 64 26 -327 15 51647 51894 14 1292 1585 14 47 57 7 -332 24 23484 54948 24 1609 1726 16 32 49 11 -333 1 14189 14189 1 1550 1550 1 63 63 1 -342 49 2579220 2622432 49 4626 6933 48 34 54 14 -344 1 6486 6486 1 509 509 1 24 24 1 -346 1987 53016 6735 1823 1334 174 294 26 62 32 -358 45 59058 60844 44 6746 722 40 57 84 15 -363 1198 1260033 2568811 1196 5710 5790 82 55 80 26 -384 150 2361175 476024 150 7008 7123 81 38 64 22 -387 277 5200 6553 252 243 521 130 65 65 1 -392 1877 1607428 2030850 1875 1416 7131 1379 54 66 13 -396 8181 1380803 6186 7920 545 798 1743 24 67 39 -398 3 5183 5213 2 291 352 3 53 59 3 -399 62 51494 59203 61 7073 754 42 55 78 18 -412 2141 1360120 2189792 2136 2491 5658 1371 71 75 5 -413 2 2036037 2064917 2 3963 4666 2 43 45 2 -431 33 2302331 2348449 33 4425 6516 32 69 69 1 -447 59 25125 33094 59 1176 1817 56 53 58 6 -456 1 53157 53157 1 1556 1556 1 26 26 1 -462 5 5456 6280 5 348 4337 5 28 40 5 -472 1 1443716 1443716 1 6122 6122 1 42 42 1 -491 34 1066102 1183673 34 6606 6822 32 46 67 15 -498 896 2230163 3054 895 537 7131 714 24 59 28 -504 108 12281 25180 108 1318 1784 94 55 66 12 -515 22 1588883 2640809 22 6554 6571 15 46 59 12 -518 1 37743 37743 1 1558 1558 1 72 72 1 -530 1 3033 3033 1 561 561 1 59 59 1 -532 26 5721 6355 25 549 665 14 44 50 7 -546 156 2577874 48517 156 1105 324 133 44 51 8 -554 12 1665194 2640066 12 1817 2951 12 57 57 1 -564 3865 2028049 2083433 3722 1115 985 2203 44 84 41 -566 4432 50605 57509 3217 1191 267 459 26 72 39 -567 8 5221 5893 7 333 558 8 27 35 4 -582 1172 1320619 2019743 1172 5819 7131 757 26 63 30 -584 43100 2500 5594 22561 134 4573 1660 48 84 37 -589 28 6046 6068 19 345 564 27 55 62 8 -595 139 1585165 1683606 138 2231 3598 132 54 84 28 -615 3 1056081 1116230 3 5794 5796 2 59 62 3 -619 7 1543114 5241 7 2442 3105 7 41 45 3 -634 2722 1221058 4999 2686 2426 7131 1735 54 60 7 -635 237 2119333 4667 237 561 5999 176 49 60 12 -644 5 1774169 2056171 5 5591 6091 4 33 39 3 -647 8 51632 64403 8 1457 1624 8 26 34 5 -651 1325 1620565 6281 1301 528 792 815 62 63 2 -665 13 4598 4789 13 511 558 11 39 46 7 -679 1560 1613200 25940 1552 1569 3118 781 49 84 35 -704 2 14226 15594 2 1086 1116 2 65 71 2 -715 25 1199352 3490 25 5036 5112 23 34 55 13 -716 1253 61989 6735 1050 1203 1625 397 52 65 14 -730 2584 5560 6170 634 2421 627 293 56 69 14 -736 8 1433153 4941 8 339 4594 8 28 36 5 -749 2 1326176 1339862 2 4339 6213 2 49 50 2 -753 1 53157 53157 1 1556 1556 1 26 26 1 -761 63 1443230 6881 63 3154 3204 26 56 73 14 -762 49 1449596 1968154 49 2437 3753 48 54 62 9 -775 35107 5330 769436 2471 447 6607 656 70 81 12 -789 1 1552458 1552458 1 2441 2441 1 62 62 1 -794 158 5585 6585 155 495 929 67 24 50 20 -839 9 29223 46530 9 1336 1465 9 52 52 1 -844 5 2377545 2377635 5 5129 6321 5 53 69 5 -846 50 2172273 2589295 50 1582 3053 48 64 68 5 -847 2577 56656 63658 1582 1444 838 474 26 63 33 -861 1333 5570 6909 839 457 489 37 33 70 34 -873 2360 1519811 50487 2248 1310 1784 316 60 68 9 -879 228 6704 6785 79 279 507 121 35 66 24 -889 5130 2070007 39692 5040 1151 6791 2606 44 66 23 -896 4 511246 859452 4 6554 6561 4 67 71 4 -912 146 1322641 2238040 146 1366 6354 143 59 59 1 -913 82 5495 6870 78 350 565 67 24 43 15 -921 763 1580790 416881 763 6191 7131 509 63 64 2 -925 318 2500952 5025 309 476 6114 182 32 56 21 -931 12 4277 4809 12 238 256 9 63 83 9 -942 954 1331 2228193 952 1121 5047 788 65 70 6 -948 14 1785593 2600431 14 6550 6598 13 34 49 9 -956 5 5755 6023 5 359 411 5 43 48 4 -963 4 3812 3835 4 444 537 4 47 53 4 -978 5 51632 58212 5 1127 1556 5 24 32 5 -980 53 47201 59744 53 1537 1625 36 41 49 9 -987 6033 2020131 763444 4306 256 792 1832 60 64 5 -993 4 1615159 1718339 4 1570 3093 4 62 63 2 diff --git a/tests/queries/0_stateless/02995_index_1.reference b/tests/queries/0_stateless/02995_index_1.reference new file mode 100644 index 00000000000..6c3b1230db6 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_1.reference @@ -0,0 +1,15 @@ +12 4 21722 2209341 4 1415 2333 4 61 64 3 +21 1134 11363 58484 1106 1458 1592 136 26 62 32 +22 210 4504 5729 196 291 767 124 47 54 8 +26 196 1327684 5221 195 4140 5661 161 28 49 19 +28 5 2034378 7102 5 325 3255 2 53 60 4 +29 53 45041 45189 45 1580 211 31 55 84 18 +38 424 1600675 4653 424 562 5944 244 60 65 6 +45 17 62743 674873 17 6239 6494 17 65 76 8 +72 1862 1210073 6200 1677 2498 528 859 51 61 11 +79 2 2255228 2255293 2 5495 7057 2 65 65 1 +85 459 1051571 1829638 459 6402 7131 334 32 61 25 +86 10 1748130 1754217 10 4374 7003 10 56 59 4 +91 165 5718 5802 75 282 7113 112 41 63 22 +94 20 1231916 2050003 20 4802 4917 19 53 59 7 +99 2 3665 36667 2 497 697 2 70 71 2 diff --git a/tests/queries/0_stateless/02995_index.sh b/tests/queries/0_stateless/02995_index_1.sh similarity index 98% rename from tests/queries/0_stateless/02995_index.sh rename to tests/queries/0_stateless/02995_index_1.sh index 5125d03904e..a5f1b30c2e8 100755 --- a/tests/queries/0_stateless/02995_index.sh +++ b/tests/queries/0_stateless/02995_index_1.sh @@ -26,7 +26,7 @@ DETACH TABLE test; ATTACH TABLE test; " -for i in {1..1000} +for i in {1..100} do echo " WITH ${i} AS try diff --git a/tests/queries/0_stateless/02995_index_10.reference b/tests/queries/0_stateless/02995_index_10.reference new file mode 100644 index 00000000000..bfa38d03801 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_10.reference @@ -0,0 +1,13 @@ +912 146 1322641 2238040 146 1366 6354 143 59 59 1 +913 82 5495 6870 78 350 565 67 24 43 15 +921 763 1580790 416881 763 6191 7131 509 63 64 2 +925 318 2500952 5025 309 476 6114 182 32 56 21 +931 12 4277 4809 12 238 256 9 63 83 9 +942 954 1331 2228193 952 1121 5047 788 65 70 6 +948 14 1785593 2600431 14 6550 6598 13 34 49 9 +956 5 5755 6023 5 359 411 5 43 48 4 +963 4 3812 3835 4 444 537 4 47 53 4 +978 5 51632 58212 5 1127 1556 5 24 32 5 +980 53 47201 59744 53 1537 1625 36 41 49 9 +987 6033 2020131 763444 4306 256 792 1832 60 64 5 +993 4 1615159 1718339 4 1570 3093 4 62 63 2 diff --git a/tests/queries/0_stateless/02995_index_10.sh b/tests/queries/0_stateless/02995_index_10.sh new file mode 100755 index 00000000000..d72c7c72705 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_10.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {901..1000} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_2.reference b/tests/queries/0_stateless/02995_index_2.reference new file mode 100644 index 00000000000..098292a289f --- /dev/null +++ b/tests/queries/0_stateless/02995_index_2.reference @@ -0,0 +1,13 @@ +103 1 2446615 2446615 1 2498 2498 1 58 58 1 +106 72 6149 6699 67 527 826 40 61 61 1 +111 43 2273186 5272 43 492 4923 4 54 72 15 +120 3129 45117 6735 2868 1030 1625 561 59 64 6 +138 2 49243 49374 2 1428 1519 2 47 48 2 +143 100 23321 63639 100 1115 1624 88 51 51 1 +145 1 2447976 2447976 1 6173 6173 1 44 44 1 +153 16 13748 16881 16 1506 1636 16 54 68 9 +159 19952 1525336 7131 12957 1280 6163 2668 24 66 39 +171 5 15042 16698 5 1302 1608 5 65 65 1 +179 6264 1362341 2686 6244 2554 7132 2705 61 67 7 +192 1 1639623 1639623 1 3406 3406 1 32 32 1 +193 1 1429969 1429969 1 7131 7131 1 45 45 1 diff --git a/tests/queries/0_stateless/02995_index_2.sh b/tests/queries/0_stateless/02995_index_2.sh new file mode 100755 index 00000000000..e7451c7ee4b --- /dev/null +++ b/tests/queries/0_stateless/02995_index_2.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {101..200} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_3.reference b/tests/queries/0_stateless/02995_index_3.reference new file mode 100644 index 00000000000..9c2fca9fde6 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_3.reference @@ -0,0 +1,16 @@ +207 12 23057 32500 12 1491 1726 12 32 46 7 +221 5081 1366870 6649 3432 4527 5226 687 24 69 39 +228 73 12281 17929 71 1328 2034 63 49 71 18 +229 2 1617478 1723791 2 4590 5578 2 41 42 2 +230 3916 1332729 6949 3668 1330 4703 845 62 65 4 +238 25 2624456 2625673 24 2535 6465 25 58 75 14 +241 154 2554929 2616444 154 2626 7131 148 34 57 17 +248 276 15529 30987 274 1040 1222 136 37 79 27 +254 3018 33966 6635 2837 1057 1622 539 24 60 33 +255 20 1581774 1811334 20 6068 6301 18 33 57 10 +256 5 5145 6841 5 367 376 5 58 58 1 +270 2 2195579 2262119 2 7102 7123 2 33 34 2 +281 32 2379460 616215 32 6042 6086 23 53 64 12 +282 7 1292651 24244 7 1607 2455 6 46 55 5 +286 123 1521935 5269 123 3793 3940 81 40 66 22 +291 21 2419080 3567 21 297 4731 21 54 55 2 diff --git a/tests/queries/0_stateless/02995_index_3.sh b/tests/queries/0_stateless/02995_index_3.sh new file mode 100755 index 00000000000..506429e2696 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_3.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {201..300} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_4.reference b/tests/queries/0_stateless/02995_index_4.reference new file mode 100644 index 00000000000..deff7afaed3 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_4.reference @@ -0,0 +1,16 @@ +316 4 5221 5616 4 505 558 4 32 35 3 +319 232 56480 63033 230 1599 313 50 33 64 26 +327 15 51647 51894 14 1292 1585 14 47 57 7 +332 24 23484 54948 24 1609 1726 16 32 49 11 +333 1 14189 14189 1 1550 1550 1 63 63 1 +342 49 2579220 2622432 49 4626 6933 48 34 54 14 +344 1 6486 6486 1 509 509 1 24 24 1 +346 1987 53016 6735 1823 1334 174 294 26 62 32 +358 45 59058 60844 44 6746 722 40 57 84 15 +363 1198 1260033 2568811 1196 5710 5790 82 55 80 26 +384 150 2361175 476024 150 7008 7123 81 38 64 22 +387 277 5200 6553 252 243 521 130 65 65 1 +392 1877 1607428 2030850 1875 1416 7131 1379 54 66 13 +396 8181 1380803 6186 7920 545 798 1743 24 67 39 +398 3 5183 5213 2 291 352 3 53 59 3 +399 62 51494 59203 61 7073 754 42 55 78 18 diff --git a/tests/queries/0_stateless/02995_index_4.sh b/tests/queries/0_stateless/02995_index_4.sh new file mode 100755 index 00000000000..1a0458728f9 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_4.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {301..400} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_5.reference b/tests/queries/0_stateless/02995_index_5.reference new file mode 100644 index 00000000000..c5ab4d2417c --- /dev/null +++ b/tests/queries/0_stateless/02995_index_5.reference @@ -0,0 +1,9 @@ +412 2141 1360120 2189792 2136 2491 5658 1371 71 75 5 +413 2 2036037 2064917 2 3963 4666 2 43 45 2 +431 33 2302331 2348449 33 4425 6516 32 69 69 1 +447 59 25125 33094 59 1176 1817 56 53 58 6 +456 1 53157 53157 1 1556 1556 1 26 26 1 +462 5 5456 6280 5 348 4337 5 28 40 5 +472 1 1443716 1443716 1 6122 6122 1 42 42 1 +491 34 1066102 1183673 34 6606 6822 32 46 67 15 +498 896 2230163 3054 895 537 7131 714 24 59 28 diff --git a/tests/queries/0_stateless/02995_index_5.sh b/tests/queries/0_stateless/02995_index_5.sh new file mode 100755 index 00000000000..60c12a8146d --- /dev/null +++ b/tests/queries/0_stateless/02995_index_5.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {401..500} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_6.reference b/tests/queries/0_stateless/02995_index_6.reference new file mode 100644 index 00000000000..bac19179bb6 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_6.reference @@ -0,0 +1,14 @@ +504 108 12281 25180 108 1318 1784 94 55 66 12 +515 22 1588883 2640809 22 6554 6571 15 46 59 12 +518 1 37743 37743 1 1558 1558 1 72 72 1 +530 1 3033 3033 1 561 561 1 59 59 1 +532 26 5721 6355 25 549 665 14 44 50 7 +546 156 2577874 48517 156 1105 324 133 44 51 8 +554 12 1665194 2640066 12 1817 2951 12 57 57 1 +564 3865 2028049 2083433 3722 1115 985 2203 44 84 41 +566 4432 50605 57509 3217 1191 267 459 26 72 39 +567 8 5221 5893 7 333 558 8 27 35 4 +582 1172 1320619 2019743 1172 5819 7131 757 26 63 30 +584 43100 2500 5594 22561 134 4573 1660 48 84 37 +589 28 6046 6068 19 345 564 27 55 62 8 +595 139 1585165 1683606 138 2231 3598 132 54 84 28 diff --git a/tests/queries/0_stateless/02995_index_6.sh b/tests/queries/0_stateless/02995_index_6.sh new file mode 100755 index 00000000000..4936f73f36b --- /dev/null +++ b/tests/queries/0_stateless/02995_index_6.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {501..600} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_7.reference b/tests/queries/0_stateless/02995_index_7.reference new file mode 100644 index 00000000000..4f1d28ab37e --- /dev/null +++ b/tests/queries/0_stateless/02995_index_7.reference @@ -0,0 +1,9 @@ +615 3 1056081 1116230 3 5794 5796 2 59 62 3 +619 7 1543114 5241 7 2442 3105 7 41 45 3 +634 2722 1221058 4999 2686 2426 7131 1735 54 60 7 +635 237 2119333 4667 237 561 5999 176 49 60 12 +644 5 1774169 2056171 5 5591 6091 4 33 39 3 +647 8 51632 64403 8 1457 1624 8 26 34 5 +651 1325 1620565 6281 1301 528 792 815 62 63 2 +665 13 4598 4789 13 511 558 11 39 46 7 +679 1560 1613200 25940 1552 1569 3118 781 49 84 35 diff --git a/tests/queries/0_stateless/02995_index_7.sh b/tests/queries/0_stateless/02995_index_7.sh new file mode 100755 index 00000000000..26be310abce --- /dev/null +++ b/tests/queries/0_stateless/02995_index_7.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {601..700} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_8.reference b/tests/queries/0_stateless/02995_index_8.reference new file mode 100644 index 00000000000..62fbfb2af9a --- /dev/null +++ b/tests/queries/0_stateless/02995_index_8.reference @@ -0,0 +1,12 @@ +704 2 14226 15594 2 1086 1116 2 65 71 2 +715 25 1199352 3490 25 5036 5112 23 34 55 13 +716 1253 61989 6735 1050 1203 1625 397 52 65 14 +730 2584 5560 6170 634 2421 627 293 56 69 14 +736 8 1433153 4941 8 339 4594 8 28 36 5 +749 2 1326176 1339862 2 4339 6213 2 49 50 2 +753 1 53157 53157 1 1556 1556 1 26 26 1 +761 63 1443230 6881 63 3154 3204 26 56 73 14 +762 49 1449596 1968154 49 2437 3753 48 54 62 9 +775 35107 5330 769436 2471 447 6607 656 70 81 12 +789 1 1552458 1552458 1 2441 2441 1 62 62 1 +794 158 5585 6585 155 495 929 67 24 50 20 diff --git a/tests/queries/0_stateless/02995_index_8.sh b/tests/queries/0_stateless/02995_index_8.sh new file mode 100755 index 00000000000..8c2620b59fd --- /dev/null +++ b/tests/queries/0_stateless/02995_index_8.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {701..800} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_9.reference b/tests/queries/0_stateless/02995_index_9.reference new file mode 100644 index 00000000000..86c7be173bd --- /dev/null +++ b/tests/queries/0_stateless/02995_index_9.reference @@ -0,0 +1,9 @@ +839 9 29223 46530 9 1336 1465 9 52 52 1 +844 5 2377545 2377635 5 5129 6321 5 53 69 5 +846 50 2172273 2589295 50 1582 3053 48 64 68 5 +847 2577 56656 63658 1582 1444 838 474 26 63 33 +861 1333 5570 6909 839 457 489 37 33 70 34 +873 2360 1519811 50487 2248 1310 1784 316 60 68 9 +879 228 6704 6785 79 279 507 121 35 66 24 +889 5130 2070007 39692 5040 1151 6791 2606 44 66 23 +896 4 511246 859452 4 6554 6561 4 67 71 4 diff --git a/tests/queries/0_stateless/02995_index_9.sh b/tests/queries/0_stateless/02995_index_9.sh new file mode 100755 index 00000000000..76160c62aaa --- /dev/null +++ b/tests/queries/0_stateless/02995_index_9.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {801..900} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql index 988d7058f21..2b10c72ae1b 100644 --- a/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql +++ b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql @@ -7,12 +7,14 @@ SETTINGS max_bytes_to_merge_at_max_space_in_pool = 80000, exclude_deleted_rows_f INSERT INTO lwd_merge SELECT number FROM numbers(10000); INSERT INTO lwd_merge SELECT number FROM numbers(10000, 10000); -OPTIMIZE TABLE lwd_merge; +SET optimize_throw_if_noop = 1; + +OPTIMIZE TABLE lwd_merge; -- { serverError CANNOT_ASSIGN_OPTIMIZE } SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; DELETE FROM lwd_merge WHERE id % 10 > 0; -OPTIMIZE TABLE lwd_merge; +OPTIMIZE TABLE lwd_merge; -- { serverError CANNOT_ASSIGN_OPTIMIZE } SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; ALTER TABLE lwd_merge MODIFY SETTING exclude_deleted_rows_for_part_size_in_merge = 1; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference deleted file mode 100644 index b19d389d8d0..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference +++ /dev/null @@ -1,10 +0,0 @@ -before -rmt_master NewPart 0 1 -rmt_master MergeParts 0 1 -rmt_slave MergeParts 1 0 -rmt_slave DownloadPart 0 1 -after -rmt_master NewPart 0 1 -rmt_master MergeParts 0 1 -rmt_slave MergeParts 1 0 -rmt_slave DownloadPart 0 2 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql deleted file mode 100644 index 548a8e5570a..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql +++ /dev/null @@ -1,35 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-shared-merge-tree --- SMT: The merge process is completely different from RMT - -drop table if exists rmt_master; -drop table if exists rmt_slave; - -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by key settings always_fetch_merged_part=0; --- always_fetch_merged_part=1, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by key settings always_fetch_merged_part=1; - -insert into rmt_master values (1); - -system sync replica rmt_master; -system sync replica rmt_slave; -system stop replicated sends rmt_master; -optimize table rmt_master final settings alter_sync=1, optimize_throw_if_noop=1; - -select sleep(3) format Null; - -system flush logs; -select 'before'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -system start replicated sends rmt_master; --- sleep few seconds to try rmt_slave to fetch the part and reflect this error --- in system.part_log -select sleep(3) format Null; -system sync replica rmt_slave; - -system flush logs; -select 'after'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -drop table rmt_master; -drop table rmt_slave; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference deleted file mode 100644 index aac9e7527d1..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference +++ /dev/null @@ -1,10 +0,0 @@ -before -rmt_master NewPart 0 1 -rmt_master MutatePart 0 1 -rmt_slave DownloadPart 0 1 -rmt_slave MutatePart 1 0 -after -rmt_master NewPart 0 1 -rmt_master MutatePart 0 1 -rmt_slave DownloadPart 0 2 -rmt_slave MutatePart 1 0 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql deleted file mode 100644 index d8b5ebb3148..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql +++ /dev/null @@ -1,41 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-shared-merge-tree --- SMT: The merge process is completely different from RMT - -drop table if exists rmt_master; -drop table if exists rmt_slave; - -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by tuple() settings always_fetch_merged_part=0, old_parts_lifetime=600; --- prefer_fetch_merged_part_*_threshold=0, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by tuple() settings prefer_fetch_merged_part_time_threshold=0, prefer_fetch_merged_part_size_threshold=0, old_parts_lifetime=600; - -insert into rmt_master values (1); - -system sync replica rmt_master; -system sync replica rmt_slave; -system stop replicated sends rmt_master; -system stop pulling replication log rmt_slave; -alter table rmt_master update key=key+100 where 1 settings alter_sync=1; - --- first we need to make the rmt_master execute mutation so that it will have --- the part, and rmt_slave will consider it instead of performing mutation on --- it's own, otherwise prefer_fetch_merged_part_*_threshold will be simply ignored -select sleep(3) format Null; -system start pulling replication log rmt_slave; --- and sleep few more seconds to try rmt_slave to fetch the part and reflect --- this error in system.part_log -select sleep(3) format Null; - -system flush logs; -select 'before'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -system start replicated sends rmt_master; -select sleep(3) format Null; -system sync replica rmt_slave; - -system flush logs; -select 'after'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -drop table rmt_master; -drop table rmt_slave; diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python new file mode 100644 index 00000000000..bfaa496805f --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -0,0 +1,659 @@ +#!/usr/bin/env python3 + +import os +import sys +import argparse +import string + + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + + +def __format(template, **params): + field_names = [v[1] for v in string.Formatter().parse(template) if v[1] is not None] + kv_args = {} + for field in field_names: + if field in params: + kv_args[field] = params[field] + else: + kv_args[field] = "" + + return template.format(**kv_args) + + +def instance_create_statement( + table_name, + table_columns, + table_keys, + table_engine, + with_deduplication, + no_merges=True, +): + template = """ + CREATE TABLE {table_name} + {table_columns} + ENGINE = {table_engine} + ORDER BY {table_keys} + {table_settings}; + {table_no_merges} + """ + + params = dict() + params["table_name"] = table_name + params["table_columns"] = table_columns + params["table_keys"] = table_keys + params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" + params["table_engine"] = ( + "MergeTree()" + if table_engine == "MergeTree" + else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + ) + + deduplication_window_setting_name = ( + "non_replicated_deduplication_window" + if table_engine == "MergeTree" + else "replicated_deduplication_window" + ) + deduplication_window_setting_value = 1000 if with_deduplication else 0 + + settings = list() + settings += [ + f"{deduplication_window_setting_name}={deduplication_window_setting_value}" + ] + params["table_settings"] = "SETTINGS " + ",".join(settings) + + return __format(template, **params) + + +def instance_insert_statement( + table_name, count, insert_method, insert_unique_blocks, use_insert_token +): + insert_settings = ( + "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" + ) + + if insert_method == "InsertSelect": + template = """ + INSERT INTO {table_name} + SELECT {insert_columns} + FROM numbers({count}) {insert_settings}; + """ + return __format( + template, + table_name=table_name, + count=count, + insert_columns=( + "'src_4', 4" + if not insert_unique_blocks + else "'src_' || toString(number), number" + ), + insert_settings=insert_settings, + ) + + else: + template = """ + INSERT INTO {table_name} + {insert_settings} VALUES {insert_values}; + """ + + values = [] + for i in range(count): + values += ( + [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + ) + insert_values = ", ".join(values) + + return __format( + template, + table_name=table_name, + insert_settings=insert_settings, + insert_values=insert_values, + ) + + +def get_drop_tables_statements(tables): + return "".join( + [f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]] + ) + + +def get_logs_statement(args): + if args.get_logs: + return "SET send_logs_level='test';" + return "" + + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise argparse.ArgumentTypeError("Boolean value expected.") + + +class ArgsFactory: + def __init__(self, parser): + self.__parser = parser + + def add_opt_engine(self): + self.__parser.add_argument( + "--table-engine", + choices=["ReplicatedMergeTree", "MergeTree"], + default="MergeTree", + ) + + def add_opt_user_token(self): + self.__parser.add_argument( + "--use-insert-token", type=str2bool, nargs="?", const=True, default=False + ) + + def add_opt_single_thread(self): + self.__parser.add_argument( + "--single-thread", type=str2bool, nargs="?", const=True, default=True + ) + + def add_opt_dedup_src(self): + self.__parser.add_argument( + "--deduplicate-src-table", + type=str2bool, + nargs="?", + const=True, + default=True, + ) + + def add_opt_dedup_dst(self): + self.__parser.add_argument( + "--deduplicate-dst-table", + type=str2bool, + nargs="?", + const=True, + default=True, + ) + + def add_opt_get_logs(self): + self.__parser.add_argument( + "--get-logs", type=str2bool, nargs="?", const=True, default=False + ) + + def add_opt_uniq_blocks(self): + self.__parser.add_argument( + "--insert-unique-blocks", type=str2bool, nargs="?", const=True, default=True + ) + + def add_opt_insert_method(self): + self.__parser.add_argument( + "--insert-method", + choices=["InsertSelect", "InsertValues"], + default="InsertSelect", + ) + + def add_all(self): + self.add_opt_engine() + self.add_opt_user_token() + self.add_opt_single_thread() + self.add_opt_dedup_src() + self.add_opt_dedup_dst() + self.add_opt_get_logs() + self.add_opt_insert_method() + self.add_opt_uniq_blocks() + + +def test_insert_several_blocks(parser): + ArgsFactory(parser).add_all() + + def calle(args): + create_table_a_b_statement = instance_create_statement( + table_name="table_a_b", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_when_b_even_statement = instance_create_statement( + table_name="table_when_b_even", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + create_mv_statement = """ + CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even + AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + """ + + drop_tables_statements = get_drop_tables_statements( + ["table_a_b", "table_when_b_even", "mv_b_even"] + ) + + insert_statement = instance_insert_statement( + "table_a_b", + 10, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, + ) + + print_details_statements = f""" + SELECT 'table_a_b'; + SELECT 'count', count() FROM table_a_b; + {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} + + SELECT 'table_when_b_even'; + SELECT 'count', count() FROM table_when_b_even; + {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 10 ) + FROM table_a_b; + SELECT throwIf( count() != 5 ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {5 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = """ + SELECT throwIf( count() != 10 ) + FROM table_a_b; + SELECT throwIf( count() != 10 ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + {create_table_a_b_statement} + + {create_table_when_b_even_statement} + + {create_mv_statement} + + -- first insert + {insert_statement} + + {print_details_statements} + + {assert_first_insert_statements} + + -- second insert, it is retry + {insert_statement} + + {print_details_statements} + + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def test_mv_generates_several_blocks(parser): + ArgsFactory(parser).add_all() + + def calle(args): + tables = [ + "table_for_join_with", + "table_a_b", + "table_when_b_even_and_joined", + "mv_b_even", + ] + drop_tables_statements = get_drop_tables_statements(tables) + + details_print_for_table_for_join_with = "" + if args.get_logs: + details_print_for_table_for_join_with = """ + SELECT 'table_for_join_with'; + SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; + """ + + create_table_a_b_statement = instance_create_statement( + table_name="table_a_b", + table_columns="(a_src String, b UInt64)", + table_keys="(a_src, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_when_b_even_and_joined_statement = instance_create_statement( + table_name="table_when_b_even_and_joined", + table_columns="(a_src String, a_join String, b UInt64)", + table_keys="(a_src, a_join, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + insert_statement = instance_insert_statement( + "table_a_b", + 5, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, + ) + + details_print_statements = f""" + SELECT 'table_a_b'; + SELECT 'count', count() FROM table_a_b; + + SELECT 'table_when_b_even_and_joined'; + SELECT 'count', count() FROM table_when_b_even_and_joined; + {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 5 ) + FROM table_a_b; + + SELECT throwIf( count() != 9 ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 18} ) + FROM table_when_b_even_and_joined; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) + FROM table_a_b; + + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even_and_joined; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) + FROM table_a_b; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even_and_joined; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + CREATE TABLE table_for_join_with + (a_join String, b UInt64) + ENGINE = MergeTree() + ORDER BY (a_join, b); + INSERT INTO table_for_join_with + SELECT 'joined_' || toString(number), number + FROM numbers(1); + {details_print_for_table_for_join_with} + + {create_table_a_b_statement} + SYSTEM STOP MERGES table_a_b; + + {create_table_when_b_even_and_joined_statement} + SYSTEM STOP MERGES table_when_b_even_and_joined; + + CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even_and_joined + AS + SELECT a_src, a_join, table_for_join_with.b as b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + + -- first insert + {insert_statement} + + {details_print_statements} + + -- first assertion + {assert_first_insert_statements} + + -- second insert + {insert_statement} + + {details_print_statements} + + -- second assertion + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def test_several_mv_into_one_table(parser): + ArgsFactory(parser).add_all() + + def calle(args): + tables = ["table_src", "table_dst", "mv_b_even", "mv_b_even_even"] + drop_tables_statements = get_drop_tables_statements(tables) + + create_table_src_statement = instance_create_statement( + table_name="table_src", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_dst_statement = instance_create_statement( + table_name="table_dst", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + insert_statement = instance_insert_statement( + "table_src", + 8, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, + ) + + details_print_statements = f""" + SELECT 'table_src count', count() FROM table_src; + + SELECT 'table_dst count', count() FROM table_dst; + {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 8 ) + FROM table_src; + + SELECT throwIf( count() != 6 ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {6 if args.deduplicate_dst_table else 12} ) + FROM table_dst; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) + FROM table_src; + + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 32} ) + FROM table_dst; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) + FROM table_src; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 16} ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 32} ) + FROM table_dst; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + {create_table_src_statement} + + {create_table_dst_statement} + + CREATE MATERIALIZED VIEW mv_b_even + TO table_dst + AS + SELECT a, b + FROM table_src + WHERE b % 2 = 0; + + CREATE MATERIALIZED VIEW mv_b_even_even + TO table_dst + AS + SELECT a, b + FROM table_src + WHERE b % 4 = 0; + + -- first insert + {insert_statement} + + {details_print_statements} + + {assert_first_insert_statements} + + -- second insert, retry + {insert_statement} + + {details_print_statements} + + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def parse_args(): + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(dest="test") + test_insert_several_blocks( + subparsers.add_parser("insert_several_blocks_into_table") + ) + test_mv_generates_several_blocks( + subparsers.add_parser("mv_generates_several_blocks") + ) + test_several_mv_into_one_table(subparsers.add_parser("several_mv_into_one_table")) + args = parser.parse_args() + if args.test is None: + parser.print_help() + return args + + +def main(): + args = parse_args() + if args.test is not None: + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference new file mode 100644 index 00000000000..4893274c1cd --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference @@ -0,0 +1,41 @@ +Different materialized view insert into one underlayed table equal data. +first attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +second attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +Different insert operations generate the same data after transformation in underlied table of materialized view. +first attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +second attempt +from dst 1 A all_1_1_0 +from dst 2 A all_2_2_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +Indentical blocks in insertion with `insert_deduplication_token` +first attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +second attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +third attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +Indentical blocks in insertion +from dst 0 A all_1_1_0 +Indentical blocks after materialised view`s transformation +first attempt +from dst 1 B all_1_1_0 +from dst 2 B all_2_2_0 +from mv_dst 0 B all_1_1_0 +from mv_dst 0 B all_2_2_0 +second attempt +from dst 1 B all_1_1_0 +from dst 2 B all_2_2_0 +from mv_dst 0 B all_1_1_0 +from mv_dst 0 B all_2_2_0 diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql new file mode 100644 index 00000000000..7927a6b1edf --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql @@ -0,0 +1,331 @@ +-- ######### +select 'Different materialized view insert into one underlayed table equal data.'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; +DROP TABLE IF EXISTS mv_first; +DROP TABLE IF EXISTS mv_second; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE TABLE mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_first +TO mv_dst +AS SELECT + 0 AS key, + value AS value +FROM dst; + +CREATE MATERIALIZED VIEW mv_second +TO mv_dst +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_second; +DROP TABLE mv_first; +DROP TABLE mv_dst; +DROP TABLE dst; + + +-- ######### +select 'Different insert operations generate the same data after transformation in underlied table of materialized view.'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000 +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst VALUES (2, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_dst; +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks in insertion with `insert_deduplication_token`'; + +DROP TABLE IF EXISTS dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +select 'first attempt'; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +select 'third attempt'; + +INSERT INTO dst SELECT + 1 AS key, + 'b' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks in insertion'; + +DROP TABLE IF EXISTS dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks after materialised view`s transformation'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000 +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst SELECT + number + 1 AS key, + IF(key = 0, 'A', 'B') AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst SELECT + number + 1 AS key, + IF(key = 0, 'A', 'B') AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_dst; +DROP TABLE dst; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference new file mode 100644 index 00000000000..c82a6eaa213 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference @@ -0,0 +1,35 @@ +no user deduplication token +partitioned_table is deduplicated bacause deduplication works in scope of one partiotion: +1 A +1 D +2 B +2 C +mv_table is not deduplicated because the inserted blocks was different: +1 A +1 A +1 D +2 B +2 B +2 C +with user deduplication token +partitioned_table is not deduplicated because different tokens: +1 A +1 A +1 D +2 B +2 B +2 C +mv_table is not deduplicated because different tokens: +1 A +1 A +1 D +2 B +2 B +2 C +with incorrect ussage of user deduplication token +partitioned_table is deduplicated because equal tokens: +1 A +2 B +mv_table is deduplicated because equal tokens: +1 A +2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql new file mode 100644 index 00000000000..2eb931f7f73 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql @@ -0,0 +1,83 @@ +DROP TABLE IF EXISTS partitioned_table; +DROP TABLE IF EXISTS mv_table; + + +SET deduplicate_blocks_in_dependent_materialized_views = 1; + + +SELECT 'no user deduplication token'; + +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + +INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is deduplicated bacause deduplication works in scope of one partiotion:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; +SELECT * FROM mv_table ORDER BY ALL; + +DROP TABLE partitioned_table; +DROP TABLE mv_table; + + +SELECT 'with user deduplication token'; + +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is not deduplicated because different tokens:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is not deduplicated because different tokens:'; +SELECT * FROM mv_table ORDER BY ALL; + +DROP TABLE partitioned_table; +DROP TABLE mv_table; + + +SELECT 'with incorrect ussage of user deduplication token'; + +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is deduplicated because equal tokens:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is deduplicated because equal tokens:'; +SELECT * FROM mv_table ORDER BY ALL; + +DROP TABLE partitioned_table; +DROP TABLE mv_table; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference new file mode 100644 index 00000000000..bf900aa84d2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh new file mode 100755 index 00000000000..ef4866cb69e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel, no-azure-blob-storage, no-s3-storage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference new file mode 100644 index 00000000000..c815324b455 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh new file mode 100755 index 00000000000..dd4b1f7cec0 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel, no-azure-blob-storage, no-s3-storage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference new file mode 100644 index 00000000000..6e76ec46aa8 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh new file mode 100755 index 00000000000..33386c76edb --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel, no-azure-blob-storage, no-s3-storage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference new file mode 100644 index 00000000000..a25e8713c61 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh new file mode 100755 index 00000000000..b66ef83abf2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel, no-azure-blob-storage, no-s3-storage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_random_setttings.reference b/tests/queries/0_stateless/03008_deduplication_random_setttings.reference new file mode 100644 index 00000000000..8e7e24a7dd2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_random_setttings.reference @@ -0,0 +1,3 @@ +insert_several_blocks_into_table OK +mv_generates_several_blocks OK +several_mv_into_one_table OK diff --git a/tests/queries/0_stateless/03008_deduplication_random_setttings.sh b/tests/queries/0_stateless/03008_deduplication_random_setttings.sh new file mode 100755 index 00000000000..e9f59138177 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_random_setttings.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +engine_options=("ReplicatedMergeTree" "MergeTree") +engine=${engine_options[ $RANDOM % ${#engine_options[@]} ]} + +insert_method_option=("InsertSelect" "InsertValues") +insert_method=${insert_method_option[ $RANDOM % ${#insert_method_option[@]} ]} + +use_insert_token_option=("True" "False") +use_insert_token=${use_insert_token_option[ $RANDOM % ${#use_insert_token_option[@]} ]} + +single_thread_options=("True" "False") +single_thread=${single_thread_options[ $RANDOM % ${#single_thread_options[@]} ]} + +deduplicate_src_table_options=("True" "False") +deduplicate_src_table=${deduplicate_src_table_options[ $RANDOM % ${#deduplicate_src_table_options[@]} ]} + +deduplicate_dst_table_options=("True" "False") +deduplicate_dst_table=${deduplicate_dst_table_options[ $RANDOM % ${#deduplicate_dst_table_options[@]} ]} + +insert_unique_blocks_options=("True" "False") +insert_unique_blocks=${insert_unique_blocks_options[ $RANDOM % ${#insert_unique_blocks_options[@]} ]} + + +THIS_RUN="Test case:" +THIS_RUN+=" insert_method=$insert_method" +THIS_RUN+=" engine=$engine" +THIS_RUN+=" use_insert_token=$use_insert_token" +THIS_RUN+=" single_thread=$single_thread" +THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" +THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" +THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) +" 1>/dev/null 2>&1 && echo 'insert_several_blocks_into_table OK' || echo "FAIL: insert_several_blocks_into_table ${THIS_RUN}" + +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) +" 1>/dev/null 2>&1 && echo 'mv_generates_several_blocks OK' || echo "FAIL: mv_generates_several_blocks ${THIS_RUN}" + +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $engine \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) +" 1>/dev/null 2>&1 && echo 'several_mv_into_one_table OK' || echo "FAIL: several_mv_into_one_table ${THIS_RUN}" + diff --git a/tests/queries/0_stateless/03008_deduplication_remote_insert_select.reference b/tests/queries/0_stateless/03008_deduplication_remote_insert_select.reference new file mode 100644 index 00000000000..9dd45974e40 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_remote_insert_select.reference @@ -0,0 +1,36 @@ +-- { echoOn } +SELECT count() FROM src; +100 +SELECT a, sum(b), uniq(b), FROM src GROUP BY a ORDER BY a; +0 450 10 +1 460 10 +2 470 10 +3 480 10 +4 490 10 +5 500 10 +6 510 10 +7 520 10 +8 530 10 +9 540 10 +SELECT count() FROM remote('127.0.0.{1..2}', currentDatabase(), src); +200 +-- { echoOn } +INSERT INTO dst_null + SELECT a, b FROM src; +SELECT + a, + sumMerge(sum_b) AS sum_b, + uniqMerge(uniq_b) AS uniq_b +FROM mv_dst +GROUP BY a +ORDER BY a; +0 450 10 +1 460 10 +2 470 10 +3 480 10 +4 490 10 +5 500 10 +6 510 10 +7 520 10 +8 530 10 +9 540 10 diff --git a/tests/queries/0_stateless/03008_deduplication_remote_insert_select.sql b/tests/queries/0_stateless/03008_deduplication_remote_insert_select.sql new file mode 100644 index 00000000000..c8e092822da --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_remote_insert_select.sql @@ -0,0 +1,51 @@ +DROP TABLE IF EXISTS src; + +CREATE TABLE src (a UInt64, b UInt64) + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_remote_insert_select/src', '{replica}') + ORDER BY tuple(); + +INSERT INTO src SELECT number % 10 as a, number as b FROM numbers(100); + +SET allow_experimental_parallel_reading_from_replicas=1; +SET max_parallel_replicas=3; +SET parallel_replicas_for_non_replicated_merge_tree=1; +SET cluster_for_parallel_replicas='parallel_replicas'; + +-- { echoOn } +SELECT count() FROM src; +SELECT a, sum(b), uniq(b), FROM src GROUP BY a ORDER BY a; +SELECT count() FROM remote('127.0.0.{1..2}', currentDatabase(), src); +-- { echoOff } + +DROP TABLE IF EXISTS dst_null; +CREATE TABLE dst_null(a UInt64, b UInt64) + ENGINE = Null; + +DROP TABLE IF EXISTS mv_dst; +CREATE MATERIALIZED VIEW mv_dst + ENGINE = AggregatingMergeTree() + ORDER BY a + AS SELECT + a, + sumState(b) AS sum_b, + uniqState(b) AS uniq_b + FROM dst_null + GROUP BY a; + +-- { echoOn } +INSERT INTO dst_null + SELECT a, b FROM src; + +SELECT + a, + sumMerge(sum_b) AS sum_b, + uniqMerge(uniq_b) AS uniq_b +FROM mv_dst +GROUP BY a +ORDER BY a; +-- { echoOff } + +DROP TABLE src; +DROP TABLE mv_dst; +DROP TABLE dst_null; + diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference new file mode 100644 index 00000000000..b6a3e0175a7 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference @@ -0,0 +1,706 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh new file mode 100755 index 00000000000..f9e1838f491 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel, no-azure-blob-storage, no-s3-storage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference new file mode 100644 index 00000000000..1921103f49e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference @@ -0,0 +1,706 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh new file mode 100755 index 00000000000..698e70d4064 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel, no-azure-blob-storage, no-s3-storage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_wrong_mv.reference b/tests/queries/0_stateless/03008_deduplication_wrong_mv.reference new file mode 100644 index 00000000000..89b4f324d5e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_wrong_mv.reference @@ -0,0 +1,15 @@ +-- { echo ON } +CREATE TABLE src (x UInt8) ENGINE = Memory; +CREATE TABLE dst (x UInt8) ENGINE = Memory; +CREATE MATERIALIZED VIEW mv1 TO dst AS SELECT * FROM src; +INSERT INTO src VALUES (0); +SELECT * from dst; +0 +TRUNCATE TABLE dst; +--DROP TABLE src SYNC; +--CREATE TABLE src (y String) ENGINE = MergeTree order by tuple(); +ALTER TABLE src ADD COLUMN y UInt8; +ALTER TABLE src DROP COLUMN x; +INSERT INTO src VALUES (0); +SELECT * from dst; +0 diff --git a/tests/queries/0_stateless/03008_deduplication_wrong_mv.sql b/tests/queries/0_stateless/03008_deduplication_wrong_mv.sql new file mode 100644 index 00000000000..dd74c38ab17 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_wrong_mv.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS mv; +DROP TABLE IF EXISTS src; +DROP TABLE IF EXISTS dst; + +-- { echo ON } +CREATE TABLE src (x UInt8) ENGINE = Memory; +CREATE TABLE dst (x UInt8) ENGINE = Memory; +CREATE MATERIALIZED VIEW mv1 TO dst AS SELECT * FROM src; + +INSERT INTO src VALUES (0); +SELECT * from dst; + +TRUNCATE TABLE dst; + +--DROP TABLE src SYNC; +--CREATE TABLE src (y String) ENGINE = MergeTree order by tuple(); +ALTER TABLE src ADD COLUMN y UInt8; +ALTER TABLE src DROP COLUMN x; + +INSERT INTO src VALUES (0); +SELECT * from dst; diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh index 1e6bfb414d8..cedb651a430 100755 --- a/tests/queries/0_stateless/03035_max_insert_threads_support.sh +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -8,7 +8,7 @@ DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" $CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC -" | grep -o MaterializingTransform | wc -l +" | grep -o StorageFileSink | wc -l DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") rm $DATA_FILE_PATH diff --git a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql index 3893ec41c0e..d94a68aa4d8 100644 --- a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql +++ b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql @@ -7,13 +7,13 @@ CREATE TABLE test_table_1 ( id UInt64, value String -) ENGINE=MergeTree ORDER BY id; +) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE test_table_2 ( id UInt64, value String -) ENGINE=MergeTree ORDER BY id; +) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO test_table_1 SELECT number, number FROM numbers(10); INSERT INTO test_table_2 SELECT number, number FROM numbers(10); diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference deleted file mode 100644 index 65034647775..00000000000 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference +++ /dev/null @@ -1,92 +0,0 @@ -MergeTree compact + horizontal merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None -MergeTree wide + horizontal merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None -MergeTree compact + vertical merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None -MergeTree wide + vertical merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh deleted file mode 100755 index 5d8eac082cf..00000000000 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" - - -function test() -{ - echo "test" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" - $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - - $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" - - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree compact + vertical merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + vertical merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sh new file mode 100755 index 00000000000..d4b6d1f4b63 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" + +$CH_CLIENT -q "drop table if exists test;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" + +$CH_CLIENT -q "system stop merges test" +$CH_CLIENT -q "insert into test select number, number from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sh new file mode 100755 index 00000000000..39671a297cf --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" + +$CH_CLIENT -q "drop table if exists test;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" + +$CH_CLIENT -q "system stop merges test" +$CH_CLIENT -q "insert into test select number, number from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sh new file mode 100755 index 00000000000..d58545c0b13 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" + +$CH_CLIENT -q "drop table if exists test;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" + +$CH_CLIENT -q "system stop merges test" +$CH_CLIENT -q "insert into test select number, number from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sh new file mode 100755 index 00000000000..39671a297cf --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" + +$CH_CLIENT -q "drop table if exists test;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" + +$CH_CLIENT -q "system stop merges test" +$CH_CLIENT -q "insert into test select number, number from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh index 7a73be20a4d..1f2a6a31ad7 100755 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" function run() { diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference deleted file mode 100644 index 72749c905a3..00000000000 --- a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference +++ /dev/null @@ -1 +0,0 @@ -1 1 1 diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql deleted file mode 100644 index de471c1a091..00000000000 --- a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql +++ /dev/null @@ -1,17 +0,0 @@ --- https://github.com/ClickHouse/ClickHouse/issues/22627 -SET allow_experimental_analyzer=1; -WITH - x AS - ( - SELECT 1 AS a - ), - xx AS - ( - SELECT * - FROM x - , x AS x1 - , x AS x2 - ) -SELECT * -FROM xx -WHERE a = 1; diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference index 39669d21bee..fd264f00d36 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.reference +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -1,5 +1,11 @@ +Negative tests +The first bit must be zero 1 +Test disabling of common subexpression elimination via first parameter 0 0 1 +Test user-provided machine ID +1 +Generated Snowflake IDs are unique 100 diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql index 0717c81aa0d..6c0a6cc0f2c 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.sql +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -1,13 +1,22 @@ -- Test SQL function 'generateSnowflakeID' -SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero +SELECT 'Negative tests'; +SELECT generateSnowflakeID(1, 2, 3); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT generateSnowflakeID(1, 'not_an_int'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT generateSnowflakeID(1, materialize(2)); -- { serverError ILLEGAL_COLUMN } + SELECT 'The first bit must be zero'; +SELECT bitAnd(bitShiftRight(generateSnowflakeID(), 63), 1) = 0; + +SELECT 'Test disabling of common subexpression elimination via first parameter'; SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs SELECT generateSnowflakeID() = generateSnowflakeID(1); -- same as ^^ -SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- enabled common subexpression elimination +SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- with common subexpression elimination -SELECT generateSnowflakeID(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT 'Test user-provided machine ID'; +SELECT bitAnd(bitShiftRight(generateSnowflakeID(1, 123), 12), 1024 - 1) = 123; -- the machine id is actually set in the generated snowflake ID (1024 = 2^10) +SELECT 'Generated Snowflake IDs are unique'; SELECT count(*) FROM ( diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference new file mode 100644 index 00000000000..2850a8aba98 --- /dev/null +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference @@ -0,0 +1,2 @@ +49999983751397 10000032 +49999983751397 10000032 diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql new file mode 100644 index 00000000000..17a67511030 --- /dev/null +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -0,0 +1,50 @@ +-- Tags: long + +DROP TABLE IF EXISTS build; +DROP TABLE IF EXISTS skewed_probe; + +SET session_timezone = 'UTC'; + +CREATE TABLE build ENGINE = MergeTree ORDER BY (key, begin) +AS + SELECT + toDateTime('1990-03-21 13:00:00') + INTERVAL number MINUTE AS begin, + number % 4 AS key, + number AS value + FROM numbers(0, 10000000); + +CREATE TABLE skewed_probe ENGINE = MergeTree ORDER BY (key, begin) +AS + SELECT + toDateTime('1990-04-21 13:00:01') + INTERVAL number MINUTE AS begin, + 0 AS key + FROM numbers(0, 5) + UNION ALL + SELECT + toDateTime('1990-05-21 13:00:01') + INTERVAL number MINUTE AS begin, + 1 AS key + FROM numbers(0, 10) + UNION ALL + SELECT + toDateTime('1990-06-21 13:00:01') + INTERVAL number MINUTE AS begin, + 2 AS key + FROM numbers(0, 20) + UNION ALL + SELECT + toDateTime('1990-03-21 13:00:01') + INTERVAL number MINUTE AS begin, + 3 AS key + FROM numbers(0, 10000000); + + +SELECT SUM(value), COUNT(*) +FROM skewed_probe +ASOF JOIN build +USING (key, begin) +; + +SELECT SUM(value), COUNT(*) +FROM skewed_probe +ASOF JOIN build +USING (key, begin) +SETTINGS join_algorithm = 'full_sorting_merge' +; diff --git a/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference new file mode 100644 index 00000000000..f130f0a3f3b --- /dev/null +++ b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference @@ -0,0 +1,58 @@ +1 0 +2 0 +3 1 +4 1 +5 1 +6 2 +7 2 +8 3 +9 3 +0 0 +1 0 +2 0 +3 1 +4 1 +5 1 +6 2 +7 2 +8 3 +9 3 +1 1 0 +1 2 0 +1 3 1 +1 4 1 +1 5 1 +1 6 2 +1 7 2 +1 8 3 +1 9 3 +2 0 10 +2 1 10 +2 2 10 +2 3 10 +2 4 10 +2 5 10 +2 6 10 +2 7 20 +2 8 20 +2 9 20 +1 0 0 +1 1 0 +1 2 0 +1 3 1 +1 4 1 +1 5 1 +1 6 2 +1 7 2 +1 8 3 +1 9 3 +2 0 10 +2 1 10 +2 2 10 +2 3 10 +2 4 10 +2 5 10 +2 6 10 +2 7 20 +2 8 20 +2 9 20 diff --git a/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql new file mode 100644 index 00000000000..87aece14628 --- /dev/null +++ b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql @@ -0,0 +1,65 @@ +SET join_algorithm = 'full_sorting_merge'; +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS events0; + +CREATE TABLE events0 ( + begin Float64, + value Int32 +) ENGINE = MergeTree ORDER BY begin; + +INSERT INTO events0 VALUES (1.0, 0), (3.0, 1), (6.0, 2), (8.0, 3); + +SELECT p.ts, e.value +FROM + (SELECT number :: Float64 AS ts FROM numbers(10)) p +ASOF JOIN events0 e +ON p.ts >= e.begin +ORDER BY p.ts ASC; + +SELECT p.ts, e.value +FROM + (SELECT number :: Float64 AS ts FROM numbers(10)) p +ASOF LEFT JOIN events0 e +ON p.ts >= e.begin +ORDER BY p.ts ASC +-- SETTINGS join_use_nulls = 1 +; + +DROP TABLE IF EXISTS events0; + +DROP TABLE IF EXISTS events; +DROP TABLE IF EXISTS probes; + +CREATE TABLE events ( + key Int32, + begin Float64, + value Int32 +) ENGINE = MergeTree ORDER BY (key, begin); + +INSERT INTO events VALUES (1, 1.0, 0), (1, 3.0, 1), (1, 6.0, 2), (1, 8.0, 3), (2, 0.0, 10), (2, 7.0, 20), (2, 11.0, 30); + +CREATE TABLE probes ( + key Int32, + ts Float64 +) ENGINE = MergeTree ORDER BY (key, ts) AS +SELECT + key.number, + ts.number +FROM + numbers(1, 2) as key, + numbers(10) as ts +SETTINGS join_algorithm = 'hash'; + +SELECT p.key, p.ts, e.value +FROM probes p +ASOF JOIN events e +ON p.key = e.key AND p.ts >= e.begin +ORDER BY p.key, p.ts ASC; + +SELECT p.key, p.ts, e.value +FROM probes p +ASOF LEFT JOIN events e +ON p.key = e.key AND p.ts >= e.begin +ORDER BY p.key, p.ts ASC NULLS FIRST; + diff --git a/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh index c27dfffcfc2..b3d0b08948b 100755 --- a/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh +++ b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh @@ -30,9 +30,9 @@ function alter_thread() while true; do REPLICA=$(($RANDOM % 3 + 1)) ADD=$(($RANDOM % 5 + 1)) - $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_steroids_$REPLICA ADD COLUMN value$ADD UInt32 DEFAULT 42 SETTINGS replication_alter_partitions_sync=0"; # additionaly we don't wait anything for more heavy concurrency + $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_steroids_$REPLICA ADD COLUMN value$ADD UInt32 DEFAULT 42 SETTINGS replication_alter_partitions_sync=0"; # additionally we don't wait anything for more heavy concurrency DROP=$(($RANDOM % 5 + 1)) - $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_steroids_$REPLICA DROP COLUMN value$DROP SETTINGS replication_alter_partitions_sync=0"; # additionaly we don't wait anything for more heavy concurrency + $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_steroids_$REPLICA DROP COLUMN value$DROP SETTINGS replication_alter_partitions_sync=0"; # additionally we don't wait anything for more heavy concurrency sleep 0.$RANDOM done } diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference new file mode 100644 index 00000000000..4aac918c98c --- /dev/null +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference @@ -0,0 +1,73 @@ +- +2023-03-21 12:00:00 1970-01-01 00:00:00 -1 +2023-03-21 13:00:00 1970-01-01 00:00:00 -1 +2023-03-21 14:00:00 2023-03-21 13:00:00 0 +2023-03-21 15:00:00 2023-03-21 14:00:00 1 +2023-03-21 16:00:00 2023-03-21 15:00:00 2 +2023-03-21 17:00:00 2023-03-21 16:00:00 3 +2023-03-21 18:00:00 2023-03-21 16:00:00 3 +2023-03-21 19:00:00 2023-03-21 16:00:00 3 +2023-03-21 20:00:00 2023-03-21 16:00:00 3 +2023-03-21 21:00:00 2023-03-21 16:00:00 3 +2027-10-18 11:03:27 2023-03-21 16:00:00 3 +- +2023-03-21 12:00:00 1970-01-01 00:00:00 -1 +2023-03-21 13:00:00 1970-01-01 00:00:00 -1 +2023-03-21 14:00:00 2023-03-21 13:00:00 0 +2023-03-21 15:00:00 2023-03-21 14:00:00 1 +2023-03-21 16:00:00 2023-03-21 15:00:00 2 +2023-03-21 17:00:00 2023-03-21 16:00:00 3 +2023-03-21 18:00:00 2023-03-21 16:00:00 3 +2023-03-21 19:00:00 2023-03-21 16:00:00 3 +2023-03-21 20:00:00 2023-03-21 16:00:00 3 +2023-03-21 21:00:00 2023-03-21 16:00:00 3 +2027-10-18 11:03:27 2023-03-21 16:00:00 3 +\N \N \N +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 13:00:00 0 +2023-03-21 14:00:00 2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2023-03-21 15:00:00 2 +2023-03-21 16:00:00 2023-03-21 16:00:00 3 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 2027-10-18 11:03:27 9 +- +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 13:00:00 0 +2023-03-21 14:00:00 2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2023-03-21 15:00:00 2 +2023-03-21 16:00:00 2023-03-21 16:00:00 3 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 2027-10-18 11:03:27 9 +\N \N \N +- +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 14:00:00 1 +2023-03-21 14:00:00 2023-03-21 15:00:00 2 +2023-03-21 15:00:00 2023-03-21 16:00:00 3 +2023-03-21 16:00:00 2027-10-18 11:03:27 9 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +- +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 14:00:00 1 +2023-03-21 14:00:00 2023-03-21 15:00:00 2 +2023-03-21 15:00:00 2023-03-21 16:00:00 3 +2023-03-21 16:00:00 2027-10-18 11:03:27 9 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 \N \N +\N \N \N diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql new file mode 100644 index 00000000000..d67aa254bd6 --- /dev/null +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql @@ -0,0 +1,66 @@ +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; + +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; + +CREATE TABLE events0 ( + begin Nullable(DateTime('UTC')), + value Int32 +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO events0 SELECT toDateTime('2023-03-21 13:00:00', 'UTC') + INTERVAL number HOUR, number FROM numbers(4); +INSERT INTO events0 VALUES (NULL, -10),('0000-01-01 00:00:00', -1), ('9999-12-31 23:59:59', 9); + +CREATE TABLE probe0 ( + begin Nullable(DateTime('UTC')) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO probe0 SELECT toDateTime('2023-03-21 12:00:00', 'UTC') + INTERVAl number HOUR FROM numbers(10); +INSERT INTO probe0 VALUES (NULL),('9999-12-31 23:59:59'); + +SET join_use_nulls = 1; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin > e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin > e.begin +ORDER BY p.begin ASC; + +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin <= e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin <= e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin < e.begin +ORDER BY p.begin ASC; + +SELECT '-'; +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin < e.begin +ORDER BY p.begin ASC; + +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; diff --git a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference new file mode 100644 index 00000000000..ca481c7fff0 --- /dev/null +++ b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference @@ -0,0 +1,2 @@ +26790 1488 +26790 1488 diff --git a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 new file mode 100644 index 00000000000..49ba70c471e --- /dev/null +++ b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 @@ -0,0 +1,39 @@ +-- Tags: long + +SET allow_experimental_analyzer=1; + +SET session_timezone = 'UTC'; + +{% for join_algorithm in ['default', 'full_sorting_merge'] -%} + +SET join_algorithm = '{{ join_algorithm }}'; + +-- TODO: enable once USING and `join_use_nulls` is supported by `full_sorting_merge` +-- SET join_use_nulls = 1; + +WITH build AS ( + SELECT + tk.number AS k, + toDateTime('2021-01-01 00:00:00') + INTERVAL i.number SECONDS AS t, + i.number % 37 AS v + FROM numbers(3000000) AS i + CROSS JOIN numbers(2) AS tk + SETTINGS join_algorithm = 'hash', join_use_nulls = 0 +), +probe AS ( + SELECT + tk.number AS k, + toDateTime('2021-01-01 00:00:30') + INTERVAL tt.number HOUR AS t + FROM numbers(2) AS tk + CROSS JOIN numbers(toUInt32((toDateTime('2021-02-01 00:00:30') - toDateTime('2021-01-01 00:00:30')) / 3600)) AS tt + SETTINGS join_algorithm = 'hash', join_use_nulls = 0 +) +SELECT + SUM(v) AS v, + COUNT(*) AS n +FROM probe +ASOF LEFT JOIN build +USING (k, t) +; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference b/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference new file mode 100644 index 00000000000..11eb84463f4 --- /dev/null +++ b/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference @@ -0,0 +1,10 @@ +108 +108 27 +513 +1218 +3528 +14553 +121275 +1495503 +12462525 +1249625025 diff --git a/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql b/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql new file mode 100644 index 00000000000..95a5f8ab3ff --- /dev/null +++ b/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql @@ -0,0 +1,186 @@ +SET allow_experimental_analyzer=1; + +SET session_timezone = 'UTC'; +SET joined_subquery_requires_alias = 0; +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; + +-- # 10 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # Coverage: Missing right side bin +WITH build AS ( + SELECT + k * 2 AS k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + intDiv(k, 2) AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v), COUNT(*) +FROM probe ASOF JOIN build USING (k, t); + +-- # 20 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(20), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 30 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(30), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 50 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(50), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 100 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(100), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 100 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(100), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 1000 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(1000), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 1000 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(1000), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 10000 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10000), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); diff --git a/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference new file mode 100644 index 00000000000..387a4a8f249 --- /dev/null +++ b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference @@ -0,0 +1,4 @@ +1 1 +3 1 +6 1 +8 1 diff --git a/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql new file mode 100644 index 00000000000..2ddf0f09b1e --- /dev/null +++ b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS events; +CREATE TABLE events (begin Float64, value Int32) ENGINE = MergeTree() ORDER BY begin; + +INSERT INTO events VALUES (1, 0), (3, 1), (6, 2), (8, 3); + +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; +SET joined_subquery_requires_alias = 0; + +SELECT + begin, + value IN ( + SELECT e1.value + FROM ( + SELECT * + FROM events e1 + WHERE e1.value = events.value + ) AS e1 + ASOF JOIN ( + SELECT number :: Float64 AS begin + FROM numbers(10) + WHERE number >= 1 AND number < 10 + ) + USING (begin) + ) +FROM events +ORDER BY begin ASC; + +DROP TABLE IF EXISTS events; diff --git a/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh b/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh index 9c290133bf9..2b4b96a9cbf 100755 --- a/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh +++ b/tests/queries/0_stateless/03148_async_queries_in_query_log_errors.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-parallel CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference new file mode 100644 index 00000000000..7cfc85d23a5 --- /dev/null +++ b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference @@ -0,0 +1,56 @@ +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +2023-03-21 12:00:00 \N +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +\N \N +2023-03-21 12:00:00 0 +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +\N 0 +2023-03-21 12:00:00 \N +2023-03-21 13:00:00 \N +2023-03-21 14:00:00 \N +2023-03-21 15:00:00 \N +2023-03-21 16:00:00 \N +2023-03-21 17:00:00 \N +2023-03-21 18:00:00 \N +2023-03-21 19:00:00 \N +2023-03-21 20:00:00 \N +2023-03-21 21:00:00 \N +2106-02-07 06:28:15 \N +\N \N diff --git a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql new file mode 100644 index 00000000000..cd83d62dc70 --- /dev/null +++ b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql @@ -0,0 +1,95 @@ +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; + +SET session_timezone = 'UTC'; +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; +SET join_use_nulls = 1; + +CREATE TABLE events0 +ENGINE = MergeTree() +ORDER BY COALESCE(begin, toDateTime('9999-12-31 23:59:59')) +AS +SELECT + toNullable(toDateTime('2023-03-21 13:00:00') + INTERVAL number HOUR) AS begin, + number AS value +FROM numbers(4); + +INSERT INTO events0 VALUES (NULL, -1), (toDateTime('9999-12-31 23:59:59'), 9); + +CREATE TABLE probe0 +ENGINE = MergeTree() +ORDER BY COALESCE(begin, toDateTime('9999-12-31 23:59:59')) +AS +SELECT + toNullable(toDateTime('2023-03-21 12:00:00') + INTERVAL number HOUR) AS begin +FROM numbers(10); + +INSERT INTO probe0 VALUES (NULL), (toDateTime('9999-12-31 23:59:59')); + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF JOIN events0 e ON p.begin >= e.begin +ORDER BY p.begin ASC; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF JOIN events0 e USING (begin) +ORDER BY p.begin ASC +SETTINGS join_use_nulls = 0 +; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN events0 e ON p.begin >= e.begin +ORDER BY p.begin ASC; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN events0 e USING (begin) +ORDER BY p.begin ASC +SETTINGS join_use_nulls = 0 +; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF RIGHT JOIN events0 e ON p.begin >= e.begin +ORDER BY e.begin ASC; -- { serverError NOT_IMPLEMENTED} + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF RIGHT JOIN events0 e USING (begin) +ORDER BY e.begin ASC; -- { serverError NOT_IMPLEMENTED} + + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN ( + SELECT * FROM events0 WHERE log(value + 5) > 10 + ) e ON p.begin + INTERVAL 2 HOUR >= e.begin + INTERVAL 1 HOUR +ORDER BY p.begin ASC; + + +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; diff --git a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql index 652b27b8a67..ac2070fbd76 100644 --- a/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql +++ b/tests/queries/0_stateless/03164_s3_settings_for_queries_and_merges.sql @@ -1,4 +1,4 @@ --- Tags: no-random-settings, no-fasttest +-- Tags: no-random-settings, no-fasttest, no-parallel SET allow_prefetched_read_pool_for_remote_filesystem=0; SET allow_prefetched_read_pool_for_local_filesystem=0; diff --git a/tests/queries/0_stateless/03172_system_detached_tables.reference b/tests/queries/0_stateless/03172_system_detached_tables.reference new file mode 100644 index 00000000000..83d1ff13942 --- /dev/null +++ b/tests/queries/0_stateless/03172_system_detached_tables.reference @@ -0,0 +1,11 @@ +database atomic tests +test03172_system_detached_tables test_table 0 +test03172_system_detached_tables test_table_perm 1 +test03172_system_detached_tables test_table 0 +test03172_system_detached_tables test_table_perm 1 +test03172_system_detached_tables test_table 0 +----------------------- +database lazy tests +before attach test03172_system_detached_tables_lazy test_table 0 +before attach test03172_system_detached_tables_lazy test_table_perm 1 +DROP TABLE diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sql b/tests/queries/0_stateless/03172_system_detached_tables.sql new file mode 100644 index 00000000000..1a3c2d7cc0f --- /dev/null +++ b/tests/queries/0_stateless/03172_system_detached_tables.sql @@ -0,0 +1,53 @@ +-- Tags: no-parallel + +SELECT 'database atomic tests'; +DROP DATABASE IF EXISTS test03172_system_detached_tables; +CREATE DATABASE IF NOT EXISTS test03172_system_detached_tables ENGINE=Atomic; + +CREATE TABLE test03172_system_detached_tables.test_table (n Int64) ENGINE=MergeTree ORDER BY n; +SELECT * FROM system.detached_tables WHERE database='test03172_system_detached_tables'; + +DETACH TABLE test03172_system_detached_tables.test_table; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables'; + +ATTACH TABLE test03172_system_detached_tables.test_table; + +CREATE TABLE test03172_system_detached_tables.test_table_perm (n Int64) ENGINE=MergeTree ORDER BY n; +SELECT * FROM system.detached_tables WHERE database='test03172_system_detached_tables'; + +DETACH TABLE test03172_system_detached_tables.test_table_perm PERMANENTLY; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables'; + +DETACH TABLE test03172_system_detached_tables.test_table SYNC; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables'; + +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables' AND table='test_table'; + +DROP DATABASE test03172_system_detached_tables SYNC; + +SELECT '-----------------------'; +SELECT 'database lazy tests'; + +DROP DATABASE IF EXISTS test03172_system_detached_tables_lazy; +CREATE DATABASE test03172_system_detached_tables_lazy Engine=Lazy(10); + +CREATE TABLE test03172_system_detached_tables_lazy.test_table (number UInt64) engine=Log; +INSERT INTO test03172_system_detached_tables_lazy.test_table SELECT * FROM numbers(100); +DETACH TABLE test03172_system_detached_tables_lazy.test_table; + +CREATE TABLE test03172_system_detached_tables_lazy.test_table_perm (number UInt64) engine=Log; +INSERT INTO test03172_system_detached_tables_lazy.test_table_perm SELECT * FROM numbers(100); +DETACH table test03172_system_detached_tables_lazy.test_table_perm PERMANENTLY; + +SELECT 'before attach', database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables_lazy'; + +ATTACH TABLE test03172_system_detached_tables_lazy.test_table; +ATTACH TABLE test03172_system_detached_tables_lazy.test_table_perm; + +SELECT 'after attach', database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables_lazy'; + +SELECT 'DROP TABLE'; +DROP TABLE test03172_system_detached_tables_lazy.test_table SYNC; +DROP TABLE test03172_system_detached_tables_lazy.test_table_perm SYNC; + +DROP DATABASE test03172_system_detached_tables_lazy SYNC; diff --git a/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.reference b/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.reference new file mode 100644 index 00000000000..1508c24f410 --- /dev/null +++ b/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.reference @@ -0,0 +1,55 @@ +-- { echoOn } +SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) +FROM system.one +GROUP BY '666'; +6.666.8 +SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) +FROM remote('127.0.0.{1,1}', 'system.one') +GROUP BY '666'; +6.666.8 +-- https://github.com/ClickHouse/ClickHouse/issues/63006 +SELECT + 6, + concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, + concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b +FROM system.one +GROUP BY toNullable(6) + WITH ROLLUP +WITH TOTALS; +6 World666666 \N +6 World666666 \N + +6 World666666 \N +SELECT + 6, + concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, + concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b +FROM remote('127.0.0.1') +GROUP BY toNullable(6) + WITH ROLLUP + WITH TOTALS; +6 World666666 \N +6 World666666 \N + +6 World666666 \N +-- { echoOn } +SELECT + '%', + tuple(concat('%', 1, toLowCardinality(toLowCardinality(toNullable(materialize(1)))), currentDatabase(), 101., toNullable(13), '%AS%id_02%', toNullable(toNullable(10)), toLowCardinality(toNullable(10)), 10, 10)), + (toDecimal128(99.67, 6), 36, 61, 14) +FROM dist_03174 +WHERE dummy IN (0, '255') +GROUP BY + toNullable(13), + (99.67, 61, toLowCardinality(14)); +% ('%11default10113%AS%id_02%10101010') (99.67,36,61,14) +-- { echoOn } +SELECT + 38, + concat(position(concat(concat(position(concat(toUInt256(3)), 'ca', 2), 3), NULLIF(1, materialize(toLowCardinality(1)))), toLowCardinality(toNullable('ca'))), concat(NULLIF(1, 1), concat(3), toNullable(3))) +FROM set_index_not__fuzz_0 +GROUP BY + toNullable(3), + concat(concat(CAST(NULL, 'Nullable(Int8)'), toNullable(3))) +FORMAT Null +SETTINGS max_threads = 1, allow_experimental_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 3, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; diff --git a/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.sql b/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.sql new file mode 100644 index 00000000000..d397d30e285 --- /dev/null +++ b/tests/queries/0_stateless/03174_low_cardinality_group_by_distributed_plan.sql @@ -0,0 +1,80 @@ +-- There are various tests that check that group by keys don't propagate into functions replacing const arguments +-- by full (empty) columns + +DROP TABLE IF EXISTS dist_03174; +DROP TABLE IF EXISTS set_index_not__fuzz_0; + +-- https://github.com/ClickHouse/ClickHouse/issues/63006 + +SET allow_experimental_analyzer=1; + +-- { echoOn } +SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) +FROM system.one +GROUP BY '666'; + +SELECT concatWithSeparator('.', toUInt128(6), '666' as b, materialize(toLowCardinality(8))) +FROM remote('127.0.0.{1,1}', 'system.one') +GROUP BY '666'; + +-- https://github.com/ClickHouse/ClickHouse/issues/63006 +SELECT + 6, + concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, + concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b +FROM system.one +GROUP BY toNullable(6) + WITH ROLLUP +WITH TOTALS; + +SELECT + 6, + concat(' World', toUInt128(6), 6, 6, 6, toNullable(6), materialize(toLowCardinality(toNullable(toUInt128(6))))) AS a, + concat(concat(' World', 6, toLowCardinality(6), ' World', toUInt256(6), materialize(6), 6, toNullable(6), 6, 6, NULL, 6, 6), ' World', 6, 6, 6, 6, toUInt256(6), NULL, 6, 6) AS b +FROM remote('127.0.0.1') +GROUP BY toNullable(6) + WITH ROLLUP + WITH TOTALS; + +-- https://github.com/ClickHouse/ClickHouse/issues/64945 +-- { echoOff } +CREATE TABLE dist_03174 AS system.one ENGINE = Distributed(test_cluster_two_shards, system, one, dummy); + +-- { echoOn } +SELECT + '%', + tuple(concat('%', 1, toLowCardinality(toLowCardinality(toNullable(materialize(1)))), currentDatabase(), 101., toNullable(13), '%AS%id_02%', toNullable(toNullable(10)), toLowCardinality(toNullable(10)), 10, 10)), + (toDecimal128(99.67, 6), 36, 61, 14) +FROM dist_03174 +WHERE dummy IN (0, '255') +GROUP BY + toNullable(13), + (99.67, 61, toLowCardinality(14)); + +-- Parallel replicas +-- { echoOff } +CREATE TABLE set_index_not__fuzz_0 +( + `name` String, + `status` Enum8('alive' = 0, 'rip' = 1), + INDEX idx_status status TYPE set(2) GRANULARITY 1 +) +ENGINE = MergeTree() +ORDER BY name; + +INSERT INTO set_index_not__fuzz_0 SELECT * FROM generateRandom() LIMIT 10; + +-- { echoOn } +SELECT + 38, + concat(position(concat(concat(position(concat(toUInt256(3)), 'ca', 2), 3), NULLIF(1, materialize(toLowCardinality(1)))), toLowCardinality(toNullable('ca'))), concat(NULLIF(1, 1), concat(3), toNullable(3))) +FROM set_index_not__fuzz_0 +GROUP BY + toNullable(3), + concat(concat(CAST(NULL, 'Nullable(Int8)'), toNullable(3))) +FORMAT Null +SETTINGS max_threads = 1, allow_experimental_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 3, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; + +-- { echoOff } +DROP TABLE IF EXISTS dist_03174; +DROP TABLE IF EXISTS set_index_not__fuzz_0; diff --git a/tests/queries/0_stateless/03198_dictionary_validate_primary_key_type.reference b/tests/queries/0_stateless/03198_dictionary_validate_primary_key_type.reference new file mode 100644 index 00000000000..c7f2781aed9 --- /dev/null +++ b/tests/queries/0_stateless/03198_dictionary_validate_primary_key_type.reference @@ -0,0 +1,4 @@ +n1 UInt64 +n2 UInt32 +n1 UInt64 +n2 UInt32 diff --git a/tests/queries/0_stateless/03198_dictionary_validate_primary_key_type.sql b/tests/queries/0_stateless/03198_dictionary_validate_primary_key_type.sql new file mode 100644 index 00000000000..e7b8730abbd --- /dev/null +++ b/tests/queries/0_stateless/03198_dictionary_validate_primary_key_type.sql @@ -0,0 +1,41 @@ +CREATE DICTIONARY `test_dictionary0` ( + `n1` String, + `n2` UInt32 +) +PRIMARY KEY n1 +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 DB 'test_db' TABLE 'table_01' USER 'default')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +SET dictionary_validate_primary_key_type=1; + +CREATE DICTIONARY `test_dictionary1` ( + `n1` String, + `n2` UInt32 +) +PRIMARY KEY n1 +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 DB 'test_db' TABLE 'table_01' USER 'default')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); -- { serverError 36 } + +CREATE DICTIONARY `test_dictionary2` ( + `n1` UInt32, + `n2` UInt32 +) +PRIMARY KEY n1 +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 DB 'test_db' TABLE 'table_01' USER 'default')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); -- { serverError 36 } + +CREATE DICTIONARY `test_dictionary3` ( + `n1` UInt64, + `n2` UInt32 +) +PRIMARY KEY n1 +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 DB 'test_db' TABLE 'table_01' USER 'default')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +DESCRIBE `test_dictionary0`; +DESCRIBE `test_dictionary3`; + diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference new file mode 100644 index 00000000000..f9fe0c2ad77 --- /dev/null +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.reference @@ -0,0 +1,17 @@ +QUERY id: 0 + PROJECTION COLUMNS + d.String Nullable(String) + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: d.String, result_type: Nullable(String), source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.test_dynamic + SETTINGS allow_experimental_analyzer=1 +foo +\N +\N +foo +\N +\N +6 +6 diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql new file mode 100644 index 00000000000..bbe701f022b --- /dev/null +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql @@ -0,0 +1,22 @@ +-- Tags: no-random-settings, no-object-storage, no-parallel +-- Tag no-object-storage: this test relies on the number of opened files in MergeTree that can differ in object storages + +SET allow_experimental_dynamic_type = 1; +DROP TABLE IF EXISTS test_dynamic; +CREATE TABLE test_dynamic (id UInt64, d Dynamic) ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; +INSERT INTO test_dynamic VALUES (1, 'foo'), (2, 1111), (3, [1, 2, 3]); +EXPLAIN QUERY TREE SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 1; +SYSTEM DROP MARK CACHE; +SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 1; +SYSTEM DROP MARK CACHE; +SELECT d.String FROM test_dynamic SETTINGS allow_experimental_analyzer = 0; +SYSTEM FLUSH LOGS; +SELECT + ProfileEvents['FileOpen'] +FROM system.query_log +WHERE (type = 2) AND (query LIKE 'SELECT d.String %test_dynamic%') AND (current_database = currentDatabase()) +ORDER BY event_time_microseconds DESC +LIMIT 2; + +DROP TABLE test_dynamic; + diff --git a/tests/queries/0_stateless/03198_group_array_intersect.reference b/tests/queries/0_stateless/03198_group_array_intersect.reference new file mode 100644 index 00000000000..6a3ffdba388 --- /dev/null +++ b/tests/queries/0_stateless/03198_group_array_intersect.reference @@ -0,0 +1,4 @@ +['0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','5\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'] +['1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','5\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','6\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'] +['1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','5\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'] +['1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0','5\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'] diff --git a/tests/queries/0_stateless/03198_group_array_intersect.sql b/tests/queries/0_stateless/03198_group_array_intersect.sql new file mode 100644 index 00000000000..ab7ba0b0d70 --- /dev/null +++ b/tests/queries/0_stateless/03198_group_array_intersect.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS test_numbers__fuzz_29; +SET max_threads=1, max_insert_threads=1; +CREATE TABLE test_numbers__fuzz_29 (`a` Array(Nullable(FixedString(19)))) ENGINE = MergeTree ORDER BY a SETTINGS allow_nullable_key=1; + +INSERT INTO test_numbers__fuzz_29 VALUES ([1,2,3,4,5,6]); +INSERT INTO test_numbers__fuzz_29 VALUES ([1,2,4,5]); +INSERT INTO test_numbers__fuzz_29 VALUES ([1,4,3,0,5,5,5]); + +SELECT arraySort(groupArrayIntersect(*)) FROM test_numbers__fuzz_29 GROUP BY a WITH ROLLUP ORDER BY ALL; + +DROP TABLE test_numbers__fuzz_29; diff --git a/tests/queries/0_stateless/03199_dictionary_table_access.reference b/tests/queries/0_stateless/03199_dictionary_table_access.reference new file mode 100644 index 00000000000..4a703b3be84 --- /dev/null +++ b/tests/queries/0_stateless/03199_dictionary_table_access.reference @@ -0,0 +1,2 @@ +ACCESS_DENIED +ACCESS_DENIED diff --git a/tests/queries/0_stateless/03199_dictionary_table_access.sh b/tests/queries/0_stateless/03199_dictionary_table_access.sh new file mode 100755 index 00000000000..952b466b5da --- /dev/null +++ b/tests/queries/0_stateless/03199_dictionary_table_access.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +username="user_${CLICKHOUSE_TEST_UNIQUE_NAME}" +dictname="dict_${CLICKHOUSE_TEST_UNIQUE_NAME}" +dicttablename="dict_table_${CLICKHOUSE_TEST_UNIQUE_NAME}" + +${CLICKHOUSE_CLIENT} -nm --query " + CREATE DICTIONARY IF NOT EXISTS ${dictname} + ( + id UInt64, + value UInt64 + ) + PRIMARY KEY id + SOURCE(NULL()) + LAYOUT(FLAT()) + LIFETIME(MIN 0 MAX 1000); + CREATE USER IF NOT EXISTS ${username} NOT IDENTIFIED; + GRANT SELECT, CREATE TEMPORARY TABLE ON *.* to ${username}; + SELECT * FROM ${dictname}; + CREATE TABLE ${dicttablename} (id UInt64, value UInt64) + ENGINE = Dictionary(${CLICKHOUSE_DATABASE}.${dictname}); + SELECT * FROM ${dicttablename}; +" + +$CLICKHOUSE_CLIENT -nm --user="${username}" --query " + SELECT * FROM ${dictname}; +" 2>&1 | grep -o ACCESS_DENIED | uniq + +$CLICKHOUSE_CLIENT -nm --user="${username}" --query " + SELECT * FROM ${dicttablename}; +" 2>&1 | grep -o ACCESS_DENIED | uniq + +${CLICKHOUSE_CLIENT} -nm --query " + DROP TABLE IF EXISTS ${dicttablename} SYNC; + DROP DICTIONARY IF EXISTS ${dictname}; + DROP USER IF EXISTS ${username}; +" diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh index aa06e48376c..968c9e5271f 100755 --- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" function test() diff --git a/tests/queries/0_stateless/03203_client_benchmark_options.reference b/tests/queries/0_stateless/03203_client_benchmark_options.reference new file mode 100644 index 00000000000..fd2996b1c78 --- /dev/null +++ b/tests/queries/0_stateless/03203_client_benchmark_options.reference @@ -0,0 +1,6 @@ +Ok +Ok +Ok +Ok +Ok +Ok diff --git a/tests/queries/0_stateless/03203_client_benchmark_options.sh b/tests/queries/0_stateless/03203_client_benchmark_options.sh new file mode 100755 index 00000000000..a9b9d69822b --- /dev/null +++ b/tests/queries/0_stateless/03203_client_benchmark_options.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -t -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1 | grep -q "^2\." && echo "Ok" || echo "Fail" +${CLICKHOUSE_CLIENT} --time -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1 | grep -q "^2\." && echo "Ok" || echo "Fail" +${CLICKHOUSE_CLIENT} --memory-usage -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9]\+$" && echo "Ok" || echo "Fail" +${CLICKHOUSE_CLIENT} --memory-usage=none -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" # expected no output +${CLICKHOUSE_CLIENT} --memory-usage=default -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9]\+$" && echo "Ok" || echo "Fail" +${CLICKHOUSE_CLIENT} --memory-usage=readable -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9].*B$" && echo "Ok" || echo "Fail" +${CLICKHOUSE_CLIENT} --memory-usage=unknown -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "BAD_ARGUMENTS" && echo "Ok" || echo "Fail" diff --git a/tests/queries/0_stateless/01487_distributed_in_not_default_db.reference b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference similarity index 50% rename from tests/queries/0_stateless/01487_distributed_in_not_default_db.reference rename to tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference index 0d66ea1aee9..6ed281c757a 100644 --- a/tests/queries/0_stateless/01487_distributed_in_not_default_db.reference +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference @@ -1,2 +1,2 @@ -0 +1 1 diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql new file mode 100644 index 00000000000..bb3269da597 --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql @@ -0,0 +1,4 @@ +CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x; +INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.reference b/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.reference new file mode 100644 index 00000000000..5512e59cc73 --- /dev/null +++ b/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.reference @@ -0,0 +1,6 @@ +this query used to be broken in old analyser: +c1 c2 yes true +this query worked: +yes true +experimental analyzer: +c1 c2 yes true diff --git a/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.sql b/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.sql new file mode 100644 index 00000000000..c97333f7df4 --- /dev/null +++ b/tests/queries/0_stateless/03203_multiif_and_where_2_conditions_old_analyzer_bug.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS bugcheck1; + +CREATE TABLE bugcheck1 +ENGINE = MergeTree +ORDER BY tuple() +AS SELECT + 'c1' as column_a, + 'c2' as column_b; + +select 'this query used to be broken in old analyser:'; +SELECT *, + multiIf(column_b IN (SELECT 'c2' as someproduct), 'yes', 'no') AS condition_1, + multiIf(column_b = 'c2', 'true', 'false') AS condition_2 +FROM (SELECT column_a, column_b FROM bugcheck1) +WHERE (condition_1 IN ('yes')) AND (condition_2 in ('true')) +settings allow_experimental_analyzer=0; + +select 'this query worked:'; + +SELECT + multiIf(column_b IN (SELECT 'c2' as someproduct), 'yes', 'no') AS condition_1, + multiIf(column_b = 'c2', 'true', 'false') AS condition_2 +FROM (SELECT column_a, column_b FROM bugcheck1) +WHERE (condition_1 IN ('yes')) AND (condition_2 in ('true')) +settings allow_experimental_analyzer=0; + +select 'experimental analyzer:'; +SELECT *, + multiIf(column_b IN (SELECT 'c2' as someproduct), 'yes', 'no') AS condition_1, + multiIf(column_b = 'c2', 'true', 'false') AS condition_2 +FROM (SELECT column_a, column_b FROM bugcheck1) +WHERE (condition_1 IN ('yes')) AND (condition_2 in ('true')) +settings allow_experimental_analyzer=1; + +DROP TABLE bugcheck1; diff --git a/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.reference b/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.reference new file mode 100644 index 00000000000..353c8f98b03 --- /dev/null +++ b/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.reference @@ -0,0 +1,2 @@ +2020-01-01 +2020-01-02 diff --git a/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql b/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql new file mode 100644 index 00000000000..f9ba28bcd60 --- /dev/null +++ b/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql @@ -0,0 +1,12 @@ +SET allow_experimental_analyzer=1; +CREATE TABLE foo (i Date) ENGINE MergeTree ORDER BY i; +INSERT INTO foo VALUES ('2020-01-01'); +INSERT INTO foo VALUES ('2020-01-02'); + +SET optimize_min_equality_disjunction_chain_length = 3; +SELECT * +FROM foo +WHERE (foo.i = parseDateTimeBestEffort('2020-01-01')) + OR (foo.i = parseDateTimeBestEffort('2020-01-02')) + OR (foo.i = parseDateTimeBestEffort('2020-01-03')) +ORDER BY foo.i ASC diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference new file mode 100644 index 00000000000..d38b21d2d05 --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference @@ -0,0 +1,14 @@ +18679 31 +0 +10 +20 +30 +40 +50 +60 +70 +80 +90 +100 +110 +4250 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql new file mode 100644 index 00000000000..756e08da27d --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql @@ -0,0 +1,38 @@ +--- The following query was buggy before, so let's use it as a test case +WITH + (num > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < num) AND ((num % y) = 0)), range(toUInt64(sqrt(num)) + 1))) = 0) AS is_prime_slow +SELECT + num, + ds, +FROM +( + WITH + arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(num)))) AS digits_sum + SELECT + 1 + (number * 2) AS num, + digits_sum AS ds + FROM numbers_mt(10000) + WHERE ds IN ( + WITH + (number > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < number) AND ((number % y) = 0)), range(toUInt64(sqrt(number)) + 1))) = 0) AS is_prime_slow + SELECT number + FROM numbers(180 + 1) + WHERE is_prime_slow + ) +) +WHERE is_prime_slow +ORDER BY num ASC +LIMIT 998, 1 +SETTINGS max_block_size = 64, max_threads=16; + +SELECT number +FROM numbers_mt(120) +WHERE (number % 10) = 0 +ORDER BY number ASC +SETTINGS max_block_size = 31, max_threads = 11; + +SELECT number +FROM numbers_mt(4242, 9) +WHERE (number % 10) = 0 +ORDER BY number ASC +SETTINGS max_block_size = 31, max_threads = 11; diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference new file mode 100644 index 00000000000..6aad3ee0b4b --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference @@ -0,0 +1,6 @@ +case 1 +9900 +9910 +9920 +case 2 +9990 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql new file mode 100644 index 00000000000..df01ddf4312 --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql @@ -0,0 +1,11 @@ +SELECT 'case 1'; +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 990, 3; + +SELECT 'case 2'; +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 999, 20 SETTINGS max_block_size = 31; diff --git a/tests/queries/0_stateless/03204_index_hint_fuzzer.reference b/tests/queries/0_stateless/03204_index_hint_fuzzer.reference new file mode 100644 index 00000000000..f750ba35102 --- /dev/null +++ b/tests/queries/0_stateless/03204_index_hint_fuzzer.reference @@ -0,0 +1 @@ +(1,1) diff --git a/tests/queries/0_stateless/03204_index_hint_fuzzer.sql b/tests/queries/0_stateless/03204_index_hint_fuzzer.sql new file mode 100644 index 00000000000..5794f3eee53 --- /dev/null +++ b/tests/queries/0_stateless/03204_index_hint_fuzzer.sql @@ -0,0 +1 @@ +SELECT tuple(indexHint(toLowCardinality('aaa')), 1); diff --git a/tests/queries/0_stateless/03205_column_type_check.reference b/tests/queries/0_stateless/03205_column_type_check.reference new file mode 100644 index 00000000000..466cabedd2e --- /dev/null +++ b/tests/queries/0_stateless/03205_column_type_check.reference @@ -0,0 +1,5 @@ +1 nan 1048575 2 +1 1 1 1 1 +() 1 nan 1048575 2 + +() 1 nan 1048575 2 diff --git a/tests/queries/0_stateless/03205_column_type_check.sql b/tests/queries/0_stateless/03205_column_type_check.sql new file mode 100644 index 00000000000..332061d6be8 --- /dev/null +++ b/tests/queries/0_stateless/03205_column_type_check.sql @@ -0,0 +1,9 @@ +SELECT * FROM (SELECT toUInt256(1)) AS t, (SELECT greatCircleAngle(toLowCardinality(toNullable(toUInt256(1048575))), 257, -9223372036854775808, 1048576), 1048575, materialize(2)) AS u; + + +SET join_algorithm='hash'; +SET allow_experimental_join_condition=1; +SELECT * FROM ( SELECT 1 AS a, toLowCardinality(1), 1) AS t1 CROSS JOIN (SELECT toLowCardinality(1 AS a), 1 AS b) AS t2; + + +SELECT * FROM (SELECT tuple(), 1 GROUP BY greatCircleAngle(toNullable(1048575), 257, toInt128(-9223372036854775808), materialize(1048576)) WITH TOTALS) AS t, (SELECT greatCircleAngle(toUInt256(1048575), 257, toNullable(-9223372036854775808), 1048576), 1048575, 2) AS u diff --git a/tests/queries/0_stateless/03205_hashing_empty_tuples.reference b/tests/queries/0_stateless/03205_hashing_empty_tuples.reference new file mode 100644 index 00000000000..e24b5809aee --- /dev/null +++ b/tests/queries/0_stateless/03205_hashing_empty_tuples.reference @@ -0,0 +1,22 @@ +16324913028386710556 +16324913028386710556 +5049034479224883533 +7385293435322750976 +12248912094175844631 +5049034479224883533 +5887129541803688833 +5887129541803688833 +13747979201178469747 +5887129541803688833 +15520217392480966957 +16324913028386710556 +16324913028386710556 +5049034479224883533 +7385293435322750976 +12248912094175844631 +5049034479224883533 +5887129541803688833 +5887129541803688833 +13747979201178469747 +5887129541803688833 +15520217392480966957 diff --git a/tests/queries/0_stateless/03205_hashing_empty_tuples.sql b/tests/queries/0_stateless/03205_hashing_empty_tuples.sql new file mode 100644 index 00000000000..4a97f30ced3 --- /dev/null +++ b/tests/queries/0_stateless/03205_hashing_empty_tuples.sql @@ -0,0 +1,23 @@ +SELECT sipHash64(()); +SELECT sipHash64((), ()); +SELECT sipHash64((), 1); +SELECT sipHash64(1, ()); +SELECT sipHash64(1, (), 1); +SELECT sipHash64((), 1, ()); +SELECT sipHash64((), (1, 2)); +SELECT sipHash64((), (1, 2)); +SELECT sipHash64((1, 2), ()); +SELECT sipHash64((), (1, 2), ()); +SELECT sipHash64((1, 2), (), (3, 4)); + +SELECT sipHash64(materialize(())); +SELECT sipHash64(materialize(()), materialize(())); +SELECT sipHash64(materialize(()), 1); +SELECT sipHash64(1, materialize(())); +SELECT sipHash64(1, materialize(()), 1); +SELECT sipHash64((), 1, materialize(())); +SELECT sipHash64(materialize(()), (1, 2)); +SELECT sipHash64(materialize(()), (1, 2)); +SELECT sipHash64((1, 2), materialize(())); +SELECT sipHash64(materialize(()), (1, 2), ()); +SELECT sipHash64((1, 2), materialize(()), (3, 4)); diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference new file mode 100644 index 00000000000..13e229432ae --- /dev/null +++ b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.reference @@ -0,0 +1,10 @@ +false 1 1 +true 1 1 +--- +false 1 1 +false 1 2 +false 1 3 +true 1 1 +true 1 2 +--- +-755809149 0 diff --git a/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql new file mode 100644 index 00000000000..4cc54bb5ac2 --- /dev/null +++ b/tests/queries/0_stateless/03205_parallel_window_finctions_and_column_sparse_bug.sql @@ -0,0 +1,33 @@ +create table t(c Int32, d Bool) Engine=MergeTree order by c; +system stop merges t; + +insert into t values (1, 0); +insert into t values (1, 0); +insert into t values (1, 1); +insert into t values (1, 0)(1, 1); + +SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t qualify c8=1 order by d settings max_threads=2, allow_experimental_analyzer = 1; +SELECT '---'; +SELECT d, c, row_number() over (partition by d order by c) as c8 FROM t order by d, c8 settings max_threads=2; +SELECT '---'; + +drop table t; + +create table t ( + c Int32 primary key , + s Bool , + w Float64 + ); + +system stop merges t; + +insert into t values(439499072,true,0),(1393290072,true,0); +insert into t values(-1317193174,false,0),(1929066636,false,0); +insert into t values(-2,false,0),(1962246186,true,0),(2054878592,false,0); +insert into t values(-1893563136,true,41.55); +insert into t values(-1338380855,true,-0.7),(-991301833,true,0),(-755809149,false,43.18),(-41,true,0),(3,false,0),(255,false,0),(255,false,0),(189195893,false,0),(195550885,false,9223372036854776000); + +SELECT * FROM ( +SELECT c, min(w) OVER (PARTITION BY s ORDER BY c ASC, s ASC, w ASC) +FROM t limit toUInt64(-1)) +WHERE c = -755809149; diff --git a/tests/queries/0_stateless/03206_is_null_constant_result_old_analyzer_bug.reference b/tests/queries/0_stateless/03206_is_null_constant_result_old_analyzer_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03206_is_null_constant_result_old_analyzer_bug.sql b/tests/queries/0_stateless/03206_is_null_constant_result_old_analyzer_bug.sql new file mode 100644 index 00000000000..39e9ebfcd77 --- /dev/null +++ b/tests/queries/0_stateless/03206_is_null_constant_result_old_analyzer_bug.sql @@ -0,0 +1,23 @@ +CREATE TABLE left (x UUID) ORDER BY tuple(); + +CREATE TABLE right (x UUID) ORDER BY tuple(); + +set allow_experimental_analyzer=0; + +SELECT left.x, (right.x IS NULL)::Boolean FROM left LEFT OUTER JOIN right ON left.x = right.x GROUP BY ALL; + +SELECT isNullable(number)::Boolean, now() FROM numbers(2) GROUP BY isNullable(number)::Boolean, now() FORMAT Null; + +SELECT isNull(number)::Boolean, now() FROM numbers(2) GROUP BY isNull(number)::Boolean, now() FORMAT Null; + +SELECT (number IS NULL)::Boolean, now() FROM numbers(2) GROUP BY (number IS NULL)::Boolean, now() FORMAT Null; + +set allow_experimental_analyzer=1; + +SELECT left.x, (right.x IS NULL)::Boolean FROM left LEFT OUTER JOIN right ON left.x = right.x GROUP BY ALL; + +SELECT isNullable(number)::Boolean, now() FROM numbers(2) GROUP BY isNullable(number)::Boolean, now() FORMAT Null; + +SELECT isNull(number)::Boolean, now() FROM numbers(2) GROUP BY isNull(number)::Boolean, now() FORMAT Null; + +SELECT (number IS NULL)::Boolean, now() FROM numbers(2) GROUP BY (number IS NULL)::Boolean, now() FORMAT Null; diff --git a/tests/queries/0_stateless/03207_composite_expressions_lambda_consistent_formatting.reference b/tests/queries/0_stateless/03207_composite_expressions_lambda_consistent_formatting.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03207_composite_expressions_lambda_consistent_formatting.sql b/tests/queries/0_stateless/03207_composite_expressions_lambda_consistent_formatting.sql new file mode 100644 index 00000000000..2e2f5510876 --- /dev/null +++ b/tests/queries/0_stateless/03207_composite_expressions_lambda_consistent_formatting.sql @@ -0,0 +1,7 @@ +SET allow_experimental_analyzer = 1; +SELECT [1, (x -> 1)]; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT (1, (x -> 1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT map(1, (x -> 1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT [1, lambda(x, 1)]; -- { serverError UNKNOWN_IDENTIFIER } +SELECT (1, lambda(x, 1)); -- { serverError UNKNOWN_IDENTIFIER } +SELECT map(1, lambda(x, 1)); -- { serverError UNKNOWN_IDENTIFIER } diff --git a/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.reference b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.reference new file mode 100644 index 00000000000..a1afeb1ab82 --- /dev/null +++ b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.reference @@ -0,0 +1 @@ +SELECT NOT ((SELECT 1)) diff --git a/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.sh b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.sh new file mode 100755 index 00000000000..594d316b621 --- /dev/null +++ b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_FORMAT --oneline --query "SELECT NOT((SELECT 1))" diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 2e21f351d2a..1a86cd9f8db 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -79,7 +79,9 @@ function check_replication_consistency() fi # Touch all data to check that it's readable (and trigger PartCheckThread if needed) - while ! $CLICKHOUSE_CLIENT -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do + # it's important to disable prefer warmed unmerged parts because + # otherwise it can read non-syncrhonized state of replicas + while ! $CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do sleep 1; num_tries=$((num_tries+1)) if [ $num_tries -eq 250 ]; then @@ -102,7 +104,9 @@ function check_replication_consistency() try_sync_replicas "$table_name_prefix" "$time_left" || exit 1 - res=$($CLICKHOUSE_CLIENT -q \ + # it's important to disable prefer warmed unmerged parts because + # otherwise it can read non-syncrhonized state of replicas + res=$($CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 -q \ "SELECT if((countDistinct(data) as c) == 0, 1, c) FROM diff --git a/tests/queries/1_stateful/00162_mmap_compression_none.sql b/tests/queries/1_stateful/00162_mmap_compression_none.sql index d2cbcea8aaa..48d6ada821e 100644 --- a/tests/queries/1_stateful/00162_mmap_compression_none.sql +++ b/tests/queries/1_stateful/00162_mmap_compression_none.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS hits_none; CREATE TABLE hits_none (Title String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -INSERT INTO hits_none SELECT Title FROM test.hits; +INSERT INTO hits_none SELECT Title FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16; SET min_bytes_to_use_mmap_io = 1; SELECT sum(length(Title)) FROM hits_none; diff --git a/tests/result b/tests/result new file mode 100644 index 00000000000..b76f44f1e6a --- /dev/null +++ b/tests/result @@ -0,0 +1,12 @@ +Using queries from 'queries' directory +Connecting to ClickHouse server...... OK +Connected to server 24.7.1.1 @ 246f421f2402799fd11b22a608b4d0d497cb8438 chesema-processor-onCancel + +Running 1 stateless tests (MainProcess). + +00993_system_parts_race_condition_drop_zookeeper: [ OK ] + +1 tests passed. 0 tests skipped. 124.59 s elapsed (MainProcess). + +0 tests passed. 0 tests skipped. 0.00 s elapsed (MainProcess). +All tests have finished. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index af3fe2b4f95..8e4e4fafe29 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -745,6 +745,7 @@ PromQL Promql Promtail Protobuf +ProtobufList ProtobufSingle ProxySQL Punycode @@ -761,6 +762,7 @@ QueryCacheMisses QueryPreempted QueryThread QuickAssist +QuickSight QuoteMeta RBAC RClickHouse @@ -1652,6 +1654,7 @@ formated formatschema formatter formatters +fqdn frac freezed fromDaysSinceYearZero @@ -2200,6 +2203,8 @@ parseReadableSizeOrZero parseTimeDelta parseable parsers +partitionId +partitionID pathFull pclmulqdq pcre @@ -2282,6 +2287,7 @@ prometheus proportionsZTest proto protobuf +protobuflist protobufsingle protocol proxied @@ -2535,6 +2541,7 @@ sqlite sqrt src srcReplicas +sshkey stackoverflow stacktrace stacktraces diff --git a/utils/check-style/check-pylint b/utils/check-style/check-pylint index 8cfbc68ac96..919a1661e45 100755 --- a/utils/check-style/check-pylint +++ b/utils/check-style/check-pylint @@ -12,5 +12,4 @@ function xargs-pylint { # exclude ci unittest scripts from check: test_* find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | grep -v "/test_" | xargs-pylint 50 -# Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo -find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1 + diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 587e015b340..59761d827e1 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -785,7 +786,7 @@ struct SetupNodeCollector if (snapshot_result.storage == nullptr) { std::cerr << "No initial snapshot found" << std::endl; - initial_storage = std::make_unique( + initial_storage = std::make_unique( /* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); initial_storage->initializeSystemNodes(); } @@ -931,7 +932,7 @@ struct SetupNodeCollector std::cerr << "Generating snapshot with starting data" << std::endl; DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(initial_storage->getZXID(), 1, std::make_shared()); - DB::KeeperStorageSnapshot snapshot(initial_storage.get(), snapshot_meta); + DB::KeeperStorageSnapshot snapshot(initial_storage.get(), snapshot_meta); snapshot_manager->serializeSnapshotToDisk(snapshot); new_nodes = false; @@ -939,9 +940,9 @@ struct SetupNodeCollector std::mutex nodes_mutex; DB::KeeperContextPtr keeper_context; - Coordination::KeeperStoragePtr initial_storage; + std::shared_ptr initial_storage; std::unordered_set nodes_created_during_replay; - std::optional snapshot_manager; + std::optional> snapshot_manager; bool new_nodes = false; }; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 25be8549dd6..60170d67a2e 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -15,7 +16,7 @@ using namespace Coordination; using namespace DB; -void dumpMachine(std::shared_ptr machine) +void dumpMachine(std::shared_ptr> machine) { auto & storage = machine->getStorageUnsafe(); std::queue keys; @@ -69,7 +70,7 @@ int main(int argc, char *argv[]) keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", argv[1])); - auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); + auto state_machine = std::make_shared>(queue, snapshots_queue, keeper_context, nullptr); state_machine->init(); size_t last_commited_index = state_machine->last_commit_index(); diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/utils/libfuzzer/runner.py old mode 100755 new mode 100644 similarity index 86% rename from docker/test/libfuzzer/run_libfuzzer.py rename to utils/libfuzzer/runner.py index fa67805dfa5..bbe648dbbc2 --- a/docker/test/libfuzzer/run_libfuzzer.py +++ b/utils/libfuzzer/runner.py @@ -20,6 +20,7 @@ def run_fuzzer(fuzzer: str): options_file = f"{fuzzer}.options" custom_libfuzzer_options = "" + fuzzer_arguments = "" with Path(options_file) as path: if path.exists() and path.is_file(): @@ -47,9 +48,17 @@ def run_fuzzer(fuzzer: str): for key, value in parser["libfuzzer"].items() ) + if parser.has_section("fuzzer_arguments"): + fuzzer_arguments = " ".join( + ("%s" % key) if value == "" else ("%s=%s" % (key, value)) + for key, value in parser["fuzzer_arguments"].items() + ) + cmd_line = f"{DEBUGGER} ./{fuzzer} {FUZZER_ARGS} {corpus_dir}" if custom_libfuzzer_options: cmd_line += f" {custom_libfuzzer_options}" + if fuzzer_arguments: + cmd_line += f" {fuzzer_arguments}" if not "-dict=" in cmd_line and Path(f"{fuzzer}.dict").exists(): cmd_line += f" -dict={fuzzer}.dict" @@ -70,8 +79,6 @@ def main(): if (current / fuzzer).is_file() and os.access(current / fuzzer, os.X_OK): run_fuzzer(fuzzer) - exit(0) - if __name__ == "__main__": main()