diff --git a/.clang-tidy b/.clang-tidy index 896052915f7..de19059d09e 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -37,7 +37,6 @@ Checks: [ '-cert-oop54-cpp', '-cert-oop57-cpp', - '-clang-analyzer-optin.core.EnumCastOutOfRange', # https://github.com/abseil/abseil-cpp/issues/1667 '-clang-analyzer-optin.performance.Padding', '-clang-analyzer-unix.Malloc', diff --git a/.editorconfig b/.editorconfig index 8ac06debb5a..8ecaf9b0267 100644 --- a/.editorconfig +++ b/.editorconfig @@ -19,3 +19,7 @@ charset = utf-8 indent_style = space indent_size = 4 trim_trailing_whitespace = true + +# Some SQL results have trailing whitespace which is removed by IDEs +[tests/queries/**.reference] +trim_trailing_whitespace = false diff --git a/.github/ISSUE_TEMPLATE/10_question.md b/.github/ISSUE_TEMPLATE/10_question.md deleted file mode 100644 index 08a05a844e0..00000000000 --- a/.github/ISSUE_TEMPLATE/10_question.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: Question -about: Ask a question about ClickHouse -title: '' -labels: question -assignees: '' - ---- - -> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse - -> If you still prefer GitHub issues, remove all this text and ask your question here. - -**Company or project name** - -Put your company name or project description here - -**Question** - -Your question diff --git a/.github/ISSUE_TEMPLATE/10_question.yaml b/.github/ISSUE_TEMPLATE/10_question.yaml new file mode 100644 index 00000000000..39d4c27807a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/10_question.yaml @@ -0,0 +1,20 @@ +name: Question +description: Ask a question about ClickHouse +labels: ["question"] +body: + - type: markdown + attributes: + value: | + > Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse + - type: textarea + attributes: + label: Company or project name + description: Put your company name or project description here. + validations: + required: false + - type: textarea + attributes: + label: Question + description: Please put your question here. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 51a1a6e2df8..e045170561d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,6 +6,7 @@ tests/ci/cancel_and_rerun_workflow_lambda/app.py --> ### Changelog category (leave one): - New Feature +- Experimental Feature - Improvement - Performance Improvement - Backward Incompatible Change @@ -48,21 +49,18 @@ At a minimum, the following information should be added (but add more as needed) - [ ] Allow: Stateful tests - [ ] Allow: Integration Tests - [ ] Allow: Performance tests -- [ ] Allow: All NOT Required Checks +- [ ] Allow: All Builds - [ ] Allow: batch 1, 2 for multi-batch jobs - [ ] Allow: batch 3, 4, 5, 6 for multi-batch jobs --- - [ ] Exclude: Style check - [ ] Exclude: Fast test -- [ ] Exclude: Integration Tests -- [ ] Exclude: Stateless tests -- [ ] Exclude: Stateful tests -- [ ] Exclude: Performance tests - [ ] Exclude: All with ASAN -- [ ] Exclude: All with Aarch64 - [ ] Exclude: All with TSAN, MSAN, UBSAN, Coverage +- [ ] Exclude: All with aarch64, release, debug --- - [ ] Do not test +- [ ] Woolen Wolfdog - [ ] Upload binaries for special builds - [ ] Disable merge-commit - [ ] Disable CI cache diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index b0380b939bb..64c3d2f8342 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -70,7 +70,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Compatibility check (amd64) + test_name: Compatibility check (release) runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckAarch64: @@ -159,33 +159,24 @@ jobs: ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ - BuilderReport: + Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderDebAarch64 - - BuilderDebAsan - - BuilderDebDebug - - BuilderDebRelease - - BuilderDebTsan - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - BuilderSpecialReport: - # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse special build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, BuilderDebAarch64, BuilderDebAsan, BuilderDebDebug, BuilderDebRelease, BuilderDebTsan, BuilderBinDarwin, BuilderBinDarwinAarch64] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Download reports + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds + - name: Builds report + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_tsan package_debug binary_darwin binary_darwin_aarch64 + - name: Set status + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds ############################################################################################ #################################### INSTALL PACKAGES ###################################### ############################################################################################ @@ -194,7 +185,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Install packages (amd64) + test_name: Install packages (release) runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} run_command: | @@ -204,7 +195,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Install packages (arm64) + test_name: Install packages (aarch64) runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} run_command: | @@ -256,8 +247,7 @@ jobs: FinishCheck: if: ${{ !failure() && !cancelled() }} needs: - - BuilderReport - - BuilderSpecialReport + - Builds_Report - FunctionalStatelessTestAsan - FunctionalStatefulTestDebug - StressTestTsan @@ -273,5 +263,8 @@ jobs: - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 finish_check.py + # update mergeable check + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + # update overall ci report + python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml new file mode 100644 index 00000000000..3988df3b2b1 --- /dev/null +++ b/.github/workflows/create_release.yml @@ -0,0 +1,29 @@ +name: CreateRelease + +concurrency: + group: release + +'on': + workflow_dispatch: + inputs: + sha: + description: 'The SHA hash of the commit from which to create the release' + required: true + type: string + type: + description: 'The type of release: "new" for a new release or "patch" for a patch release' + required: true + type: choice + options: + - new + - patch + +jobs: + Release: + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Print greeting + run: | + python3 ./tests/ci/release.py --commit ${{ inputs.sha }} --type ${{ inputs.type }} --dry-run diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index c2a893a8e99..2a7e6f737ab 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -104,8 +104,8 @@ jobs: with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - # stage for jobs that do not prohibit merge Tests_3: + # Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there. needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml @@ -114,23 +114,14 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# - # Reports should be run even if Builds_1/2 failed - put them separately in wf (not in Tests_1/2) - Builds_1_Report: + # Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3 + Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} - needs: [RunConfig, Builds_1] + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, Builds_1, Builds_2] uses: ./.github/workflows/reusable_test.yml with: - test_name: ClickHouse build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - Builds_2_Report: - # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} - needs: [RunConfig, Builds_2] - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse special build check + test_name: Builds runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} @@ -164,7 +155,7 @@ jobs: FinishCheck: if: ${{ !cancelled() }} - needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] + needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code @@ -172,4 +163,4 @@ jobs: - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 finish_check.py + python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index c8b2452829b..01685ee1f5a 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -96,20 +96,15 @@ jobs: stage: Tests_1 data: ${{ needs.RunConfig.outputs.data }} - ################################# Stage Final ################################# - # - FinishCheck: - if: ${{ !failure() && !cancelled() }} + CheckReadyForMerge: + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 - - name: Check sync status + - name: Check and set merge status run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 sync_pr.py --status - - name: Finish label - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 finish_check.py ${{ (contains(needs.*.result, 'failure') && github.event_name == 'merge_group') && '--pipeline-failure' || '' }} + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 7d22554473e..4764e6d3c1a 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -126,16 +126,16 @@ jobs: with: stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} + # stage for running non-required checks without being blocked by required checks (Test_1) if corresponding settings is selected Tests_2: - needs: [RunConfig, Builds_2] + needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} uses: ./.github/workflows/reusable_test_stage.yml with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - # stage for jobs that do not prohibit merge Tests_3: - needs: [RunConfig, Tests_1, Tests_2] + needs: [RunConfig, Builds_1, Tests_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml with: @@ -143,29 +143,21 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} ################################# Reports ################################# - # Reports should by run even if Builds_1/2 fail, so put them separately in wf (not in Tests_1/2) - Builds_1_Report: + # Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3) + Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} - needs: [RunConfig, StyleCheck, Builds_1] + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, StyleCheck, Builds_1, Builds_2] uses: ./.github/workflows/reusable_test.yml with: - test_name: ClickHouse build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - Builds_2_Report: - # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }} - needs: [RunConfig, StyleCheck, Builds_2] - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse special build check + test_name: Builds runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] + # Test_2 or Test_3 must not have jobs required for Mergeable check + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code @@ -181,7 +173,7 @@ jobs: # FinishCheck: if: ${{ !cancelled() }} - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3] + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code @@ -191,7 +183,7 @@ jobs: - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 finish_check.py + python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} ############################################################################################# ###################################### JEPSEN TESTS ######################################### @@ -204,8 +196,7 @@ jobs: concurrency: group: jepsen if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse Keeper Jepsen') }} - # jepsen needs binary_release build which is in Builds_2 - needs: [RunConfig, Builds_2] + needs: [RunConfig, Builds_1] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse Keeper Jepsen diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 4d45c8d8d4b..6bf846d7535 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -65,7 +65,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Compatibility check (amd64) + test_name: Compatibility check (release) runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} CompatibilityCheckAarch64: @@ -176,35 +176,24 @@ jobs: ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ - BuilderReport: + Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderDebRelease - - BuilderDebAarch64 - - BuilderDebAsan - - BuilderDebTsan - - BuilderDebUBsan - - BuilderDebMsan - - BuilderDebDebug - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - BuilderSpecialReport: - # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse special build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64, BuilderDebAsan, BuilderDebUBsan, BuilderDebMsan, BuilderDebTsan, BuilderDebDebug, BuilderBinDarwin, BuilderBinDarwinAarch64] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Download reports + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds + - name: Builds report + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_msan package_ubsan package_tsan package_debug binary_darwin binary_darwin_aarch64 + - name: Set status + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds MarkReleaseReady: if: ${{ !failure() && !cancelled() }} needs: @@ -244,7 +233,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Install packages (amd64) + test_name: Install packages (release) runner_type: style-checker data: ${{ needs.RunConfig.outputs.data }} run_command: | @@ -254,7 +243,7 @@ jobs: if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml with: - test_name: Install packages (arm64) + test_name: Install packages (aarch64) runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} run_command: | @@ -460,8 +449,7 @@ jobs: needs: - DockerServerImage - DockerKeeperImage - - BuilderReport - - BuilderSpecialReport + - Builds_Report - MarkReleaseReady - FunctionalStatelessTestDebug - FunctionalStatelessTestRelease @@ -496,4 +484,7 @@ jobs: - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 finish_check.py + # update mergeable check + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + # update overall ci report + python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index e4fc9f0b1d3..2aa7694bc41 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -46,9 +46,10 @@ jobs: ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv ./utils/list-versions/update-docker-version.sh GID=$(id -g "${UID}") - docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 \ + # --network=host and CI=1 are required for the S3 access from a container + docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ --volume="${GITHUB_WORKSPACE}:/ClickHouse" clickhouse/style-test \ - /ClickHouse/utils/changelog/changelog.py -v --debug-helpers \ + /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ --gh-user-or-token="$GITHUB_TOKEN" --jobs=5 \ --output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" "${GITHUB_TAG}" git add "./docs/changelogs/${GITHUB_TAG}.md" diff --git a/.gitmodules b/.gitmodules index 28696428e8c..12d865307d8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -91,13 +91,13 @@ [submodule "contrib/aws"] path = contrib/aws url = https://github.com/ClickHouse/aws-sdk-cpp -[submodule "aws-c-event-stream"] +[submodule "contrib/aws-c-event-stream"] path = contrib/aws-c-event-stream url = https://github.com/awslabs/aws-c-event-stream -[submodule "aws-c-common"] +[submodule "contrib/aws-c-common"] path = contrib/aws-c-common url = https://github.com/awslabs/aws-c-common.git -[submodule "aws-checksums"] +[submodule "contrib/aws-checksums"] path = contrib/aws-checksums url = https://github.com/awslabs/aws-checksums [submodule "contrib/curl"] @@ -163,7 +163,7 @@ url = https://github.com/xz-mirror/xz [submodule "contrib/abseil-cpp"] path = contrib/abseil-cpp - url = https://github.com/abseil/abseil-cpp + url = https://github.com/ClickHouse/abseil-cpp.git [submodule "contrib/dragonbox"] path = contrib/dragonbox url = https://github.com/ClickHouse/dragonbox diff --git a/.yamllint b/.yamllint index 9d6550ac960..f144e2d47b1 100644 --- a/.yamllint +++ b/.yamllint @@ -13,5 +13,4 @@ rules: level: warning comments: min-spaces-from-content: 1 - document-start: - present: false + document-start: disable diff --git a/CHANGELOG.md b/CHANGELOG.md index 4891b79e4c7..e2eb65e2967 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v24.6, 2024-07-01](#246)**
**[ClickHouse release v24.5, 2024-05-30](#245)**
**[ClickHouse release v24.4, 2024-04-30](#244)**
**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**
@@ -8,6 +9,169 @@ # 2024 Changelog +### ClickHouse release 24.6, 2024-07-01 + +#### Backward Incompatible Change +* Enable asynchronous load of databases and tables by default. See the `async_load_databases` in config.xml. While this change is fully compatible, it can introduce a difference in behavior. When `async_load_databases` is false, as in the previous versions, the server will not accept connections until all tables are loaded. When `async_load_databases` is true, as in the new version, the server can accept connections before all the tables are loaded. If a query is made to a table that is not yet loaded, it will wait for the table's loading, which can take considerable time. It can change the behavior of the server if it is part of a large distributed system under a load balancer. In the first case, the load balancer can get a connection refusal and quickly failover to another server. In the second case, the load balancer can connect to a server that is still loading the tables, and the query will have a higher latency. Moreover, if many queries accumulate in the waiting state, it can lead to a "thundering herd" problem when they start processing simultaneously. This can make a difference only for highly loaded distributed backends. You can set the value of `async_load_databases` to false to avoid this problem. [#57695](https://github.com/ClickHouse/ClickHouse/pull/57695) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Setting `replace_long_file_name_to_hash` is enabled by default for `MergeTree` tables. [#64457](https://github.com/ClickHouse/ClickHouse/pull/64457) ([Anton Popov](https://github.com/CurtizJ)). This setting is fully compatible, and no actions needed during upgrade. The new data format is supported from all versions starting from 23.9. After enabling this setting, you can no longer downgrade to a version 23.8 or older. +* Some invalid queries will fail earlier during parsing. Note: disabled the support for inline KQL expressions (the experimental Kusto language) when they are put into a `kql` table function without a string literal, e.g. `kql(garbage | trash)` instead of `kql('garbage | trash')` or `kql($$garbage | trash$$)`. This feature was introduced unintentionally and should not exist. [#61500](https://github.com/ClickHouse/ClickHouse/pull/61500) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rework parallel processing in `Ordered` mode of storage `S3Queue`. This PR is backward incompatible for Ordered mode if you used settings `s3queue_processing_threads_num` or `s3queue_total_shards_num`. Setting `s3queue_total_shards_num` is deleted, previously it was allowed to use only under `s3queue_allow_experimental_sharded_mode`, which is now deprecated. A new setting is added - `s3queue_buckets`. [#64349](https://github.com/ClickHouse/ClickHouse/pull/64349) ([Kseniia Sumarokova](https://github.com/kssenii)). +* New functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` were added. Unlike the existing functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake`, the new functions are compatible with function `generateSnowflakeID`, i.e. they accept the snowflake IDs generated by `generateSnowflakeID` and produce snowflake IDs of the same type as `generateSnowflakeID` (i.e. `UInt64`). Furthermore, the new functions default to the UNIX epoch (aka. 1970-01-01), just like `generateSnowflakeID`. If necessary, a different epoch, e.g. Twitter's/X's epoch 2010-11-04 aka. 1288834974657 msec since UNIX epoch, can be passed. The old conversion functions are deprecated and will be removed after a transition period: to use them regardless, enable setting `allow_deprecated_snowflake_conversion_functions`. [#64948](https://github.com/ClickHouse/ClickHouse/pull/64948) ([Robert Schulze](https://github.com/rschu1ze)). + +#### New Feature +* Allow to store named collections in ClickHouse Keeper. [#64574](https://github.com/ClickHouse/ClickHouse/pull/64574) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support empty tuples. [#55061](https://github.com/ClickHouse/ClickHouse/pull/55061) ([Amos Bird](https://github.com/amosbird)). +* Add Hilbert Curve encode and decode functions. [#60156](https://github.com/ClickHouse/ClickHouse/pull/60156) ([Artem Mustafin](https://github.com/Artemmm91)). +* Add support for index analysis over `hilbertEncode`. [#64662](https://github.com/ClickHouse/ClickHouse/pull/64662) ([Artem Mustafin](https://github.com/Artemmm91)). +* Added support for reading `LINESTRING` geometry in the WKT format using function `readWKTLineString`. [#62519](https://github.com/ClickHouse/ClickHouse/pull/62519) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow to attach parts from a different disk. [#63087](https://github.com/ClickHouse/ClickHouse/pull/63087) ([Unalian](https://github.com/Unalian)). +* Added new SQL functions `generateSnowflakeID` for generating Twitter-style Snowflake IDs. [#63577](https://github.com/ClickHouse/ClickHouse/pull/63577) ([Danila Puzov](https://github.com/kazalika)). +* Added `merge_workload` and `mutation_workload` settings to regulate how resources are utilized and shared between merges, mutations and other workloads. [#64061](https://github.com/ClickHouse/ClickHouse/pull/64061) ([Sergei Trifonov](https://github.com/serxa)). +* Add support for comparing `IPv4` and `IPv6` types using the `=` operator. [#64292](https://github.com/ClickHouse/ClickHouse/pull/64292) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Support decimal arguments in binary math functions (pow, atan2, max2, min2, hypot). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Added SQL functions `parseReadableSize` (along with `OrNull` and `OrZero` variants). [#64742](https://github.com/ClickHouse/ClickHouse/pull/64742) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Add server settings `max_table_num_to_throw` and `max_database_num_to_throw` to limit the number of databases or tables on `CREATE` queries. [#64781](https://github.com/ClickHouse/ClickHouse/pull/64781) ([Xu Jia](https://github.com/XuJia0210)). +* Add `_time` virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)). +* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)). +* Add `http_response_headers` configuration to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)). +* Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)). This is useful for testing. +* Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)). +* Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)). + +#### Experimental Feature +* Introduce statistics of type "number of distinct values". [#59357](https://github.com/ClickHouse/ClickHouse/pull/59357) ([Han Fei](https://github.com/hanfei1991)). +* Support statistics with ReplicatedMergeTree. [#64934](https://github.com/ClickHouse/ClickHouse/pull/64934) ([Han Fei](https://github.com/hanfei1991)). +* If "replica group" is configured for a `Replicated` database, automatically create a cluster that includes replicas from all groups. [#64312](https://github.com/ClickHouse/ClickHouse/pull/64312) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add settings `parallel_replicas_custom_key_range_lower` and `parallel_replicas_custom_key_range_upper` to control how parallel replicas with dynamic shards parallelizes queries when using a range filter. [#64604](https://github.com/ClickHouse/ClickHouse/pull/64604) ([josh-hildred](https://github.com/josh-hildred)). + +#### Performance Improvement +* Add the ability to reshuffle rows during insert to optimize for size without violating the order set by `PRIMARY KEY`. It's controlled by the setting `optimize_row_order` (off by default). [#63578](https://github.com/ClickHouse/ClickHouse/pull/63578) ([Igor Markelov](https://github.com/ElderlyPassionFruit)). +* Add a native parquet reader, which can read parquet binary to ClickHouse Columns directly. It's controlled by the setting `input_format_parquet_use_native_reader` (disabled by default). [#60361](https://github.com/ClickHouse/ClickHouse/pull/60361) ([ZhiHong Zhang](https://github.com/copperybean)). +* Support partial trivial count optimization when the query filter is able to select exact ranges from merge tree tables. [#60463](https://github.com/ClickHouse/ClickHouse/pull/60463) ([Amos Bird](https://github.com/amosbird)). +* Reduce max memory usage of multithreaded `INSERT`s by collecting chunks of multiple threads in a single transform. [#61047](https://github.com/ClickHouse/ClickHouse/pull/61047) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Reduce the memory usage when using Azure object storage by using fixed memory allocation, avoiding the allocation of an extra buffer. [#63160](https://github.com/ClickHouse/ClickHouse/pull/63160) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Reduce the number of virtual function calls in `ColumnNullable::size`. [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)). +* Speedup `splitByRegexp` when the regular expression argument is a single-character. [#62696](https://github.com/ClickHouse/ClickHouse/pull/62696) ([Robert Schulze](https://github.com/rschu1ze)). +* Speed up aggregation by 8-bit and 16-bit keys by keeping track of the min and max keys used. This allows to reduce the number of cells that need to be verified. [#62746](https://github.com/ClickHouse/ClickHouse/pull/62746) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize operator IN when the left hand side is `LowCardinality` and the right is a set of constants. [#64060](https://github.com/ClickHouse/ClickHouse/pull/64060) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Use a thread pool to initialize and destroy hash tables inside `ConcurrentHashJoin`. [#64241](https://github.com/ClickHouse/ClickHouse/pull/64241) ([Nikita Taranov](https://github.com/nickitat)). +* Optimized vertical merges in tables with sparse columns. [#64311](https://github.com/ClickHouse/ClickHouse/pull/64311) ([Anton Popov](https://github.com/CurtizJ)). +* Enabled prefetches of data from remote filesystem during vertical merges. It improves latency of vertical merges in tables with data stored on remote filesystem. [#64314](https://github.com/ClickHouse/ClickHouse/pull/64314) ([Anton Popov](https://github.com/CurtizJ)). +* Reduce redundant calls to `isDefault` of `ColumnSparse::filter` to improve performance. [#64426](https://github.com/ClickHouse/ClickHouse/pull/64426) ([Jiebin Sun](https://github.com/jiebinn)). +* Speedup `find_super_nodes` and `find_big_family` keeper-client commands by making multiple asynchronous getChildren requests. [#64628](https://github.com/ClickHouse/ClickHouse/pull/64628) ([Alexander Gololobov](https://github.com/davenger)). +* Improve function `least`/`greatest` for nullable numberic type arguments. [#64668](https://github.com/ClickHouse/ClickHouse/pull/64668) ([KevinyhZou](https://github.com/KevinyhZou)). +* Allow merging two consequent filtering steps of a query plan. This improves filter-push-down optimization if the filter condition can be pushed down from the parent step. [#64760](https://github.com/ClickHouse/ClickHouse/pull/64760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove bad optimization in the vertical final implementation and re-enable vertical final algorithm by default. [#64783](https://github.com/ClickHouse/ClickHouse/pull/64783) ([Duc Canh Le](https://github.com/canhld94)). +* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with the new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)). +* Added settings to disable materialization of skip indexes and statistics on inserts (`materialize_skip_indexes_on_insert` and `materialize_statistics_on_insert`). [#64391](https://github.com/ClickHouse/ClickHouse/pull/64391) ([Anton Popov](https://github.com/CurtizJ)). +* Use the allocated memory size to calculate the row group size and reduce the peak memory of the parquet writer in the single-threaded mode. [#64424](https://github.com/ClickHouse/ClickHouse/pull/64424) ([LiuNeng](https://github.com/liuneng1994)). +* Improve the iterator of sparse column to reduce call of `size`. [#64497](https://github.com/ClickHouse/ClickHouse/pull/64497) ([Jiebin Sun](https://github.com/jiebinn)). +* Update condition to use server-side copy for backups to Azure blob storage. [#64518](https://github.com/ClickHouse/ClickHouse/pull/64518) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Optimized memory usage of vertical merges for tables with high number of skip indexes. [#64580](https://github.com/ClickHouse/ClickHouse/pull/64580) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* `SHOW CREATE TABLE` executed on top of system tables will now show the super handy comment unique for each table which will explain why this table is needed. [#63788](https://github.com/ClickHouse/ClickHouse/pull/63788) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* The second argument (scale) of functions `round()`, `roundBankers()`, `floor()`, `ceil()` and `trunc()` can now be non-const. [#64798](https://github.com/ClickHouse/ClickHouse/pull/64798) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Hot reload storage policy for `Distributed` tables when adding a new disk. [#58285](https://github.com/ClickHouse/ClickHouse/pull/58285) ([Duc Canh Le](https://github.com/canhld94)). +* Avoid possible deadlock during MergeTree index analysis when scheduling threads in a saturated service. [#59427](https://github.com/ClickHouse/ClickHouse/pull/59427) ([Sean Haynes](https://github.com/seandhaynes)). +* Several minor corner case fixes to S3 proxy support & tunneling. [#63427](https://github.com/ClickHouse/ClickHouse/pull/63427) ([Arthur Passos](https://github.com/arthurpassos)). +* Improve io_uring resubmit visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)). +* Added a new setting, `metadata_keep_free_space_bytes` to keep free space on the metadata storage disk. [#64128](https://github.com/ClickHouse/ClickHouse/pull/64128) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Add metrics to track the number of directories created and removed by the `plain_rewritable` metadata storage, and the number of entries in the local-to-remote in-memory map. [#64175](https://github.com/ClickHouse/ClickHouse/pull/64175) ([Julia Kartseva](https://github.com/jkartseva)). +* The query cache now considers identical queries with different settings as different. This increases robustness in cases where different settings (e.g. `limit` or `additional_table_filters`) would affect the query result. [#64205](https://github.com/ClickHouse/ClickHouse/pull/64205) ([Robert Schulze](https://github.com/rschu1ze)). +* Support the non standard error code `QpsLimitExceeded` in object storage as a retryable error. [#64225](https://github.com/ClickHouse/ClickHouse/pull/64225) ([Sema Checherinda](https://github.com/CheSema)). +* Forbid converting a MergeTree table to replicated if the zookeeper path for this table already exists. [#64244](https://github.com/ClickHouse/ClickHouse/pull/64244) ([Kirill](https://github.com/kirillgarbar)). +* Added a new setting `input_format_parquet_prefer_block_bytes` to control the average output block bytes, and modified the default value of `input_format_parquet_max_block_size` to 65409. [#64427](https://github.com/ClickHouse/ClickHouse/pull/64427) ([LiuNeng](https://github.com/liuneng1994)). +* Allow proxy to be bypassed for hosts specified in `no_proxy` env variable and ClickHouse proxy configuration. [#63314](https://github.com/ClickHouse/ClickHouse/pull/63314) ([Arthur Passos](https://github.com/arthurpassos)). +* Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). +* Settings from the user's config don't affect merges and mutations for `MergeTree` on top of object storage. [#64456](https://github.com/ClickHouse/ClickHouse/pull/64456) ([alesapin](https://github.com/alesapin)). +* Support the non standard error code `TotalQpsLimitExceeded` in object storage as a retryable error. [#64520](https://github.com/ClickHouse/ClickHouse/pull/64520) ([Sema Checherinda](https://github.com/CheSema)). +* Updated Advanced Dashboard for both open-source and ClickHouse Cloud versions to include a chart for 'Maximum concurrent network connections'. [#64610](https://github.com/ClickHouse/ClickHouse/pull/64610) ([Thom O'Connor](https://github.com/thomoco)). +* Improve progress report on `zeros_mt` and `generateRandom`. [#64804](https://github.com/ClickHouse/ClickHouse/pull/64804) ([Raúl Marín](https://github.com/Algunenano)). +* Add an asynchronous metric `jemalloc.profile.active` to show whether sampling is currently active. This is an activation mechanism in addition to prof.active; both must be active for the calling thread to sample. [#64842](https://github.com/ClickHouse/ClickHouse/pull/64842) ([Unalian](https://github.com/Unalian)). +* Remove mark of `allow_experimental_join_condition` as important. This mark may have prevented distributed queries in a mixed versions cluster from being executed successfully. [#65008](https://github.com/ClickHouse/ClickHouse/pull/65008) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added server Asynchronous metrics `DiskGetObjectThrottler*` and `DiskGetObjectThrottler*` reflecting request per second rate limit defined with `s3_max_get_rps` and `s3_max_put_rps` disk settings and currently available number of requests that could be sent without hitting throttling limit on the disk. Metrics are defined for every disk that has a configured limit. [#65050](https://github.com/ClickHouse/ClickHouse/pull/65050) ([Sergei Trifonov](https://github.com/serxa)). +* Initialize global trace collector for `Poco::ThreadPool` (needed for Keeper, etc). [#65239](https://github.com/ClickHouse/ClickHouse/pull/65239) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add a validation when creating a user with `bcrypt_hash`. [#65242](https://github.com/ClickHouse/ClickHouse/pull/65242) ([Raúl Marín](https://github.com/Algunenano)). +* Add profile events for number of rows read during/after `PREWHERE`. [#64198](https://github.com/ClickHouse/ClickHouse/pull/64198) ([Nikita Taranov](https://github.com/nickitat)). +* Print query in `EXPLAIN PLAN` with parallel replicas. [#64298](https://github.com/ClickHouse/ClickHouse/pull/64298) ([vdimir](https://github.com/vdimir)). +* Rename `allow_deprecated_functions` to `allow_deprecated_error_prone_window_functions`. [#64358](https://github.com/ClickHouse/ClickHouse/pull/64358) ([Raúl Marín](https://github.com/Algunenano)). +* Respect `max_read_buffer_size` setting for file descriptors as well in the `file` table function. [#64532](https://github.com/ClickHouse/ClickHouse/pull/64532) ([Azat Khuzhin](https://github.com/azat)). +* Disable transactions for unsupported storages even for materialized views. [#64918](https://github.com/ClickHouse/ClickHouse/pull/64918) ([alesapin](https://github.com/alesapin)). +* Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* A bug in Apache ORC library was fixed: Fixed ORC statistics calculation, when writing, for unsigned types on all platforms and Int8 on ARM. [#64563](https://github.com/ClickHouse/ClickHouse/pull/64563) ([Michael Kolupaev](https://github.com/al13n321)). +* Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix a permission error where a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)). +* Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). +* Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix crash with `DISTINCT` and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)). +* Fixed 'set' skip index not working with IN and indexHint(). [#62083](https://github.com/ClickHouse/ClickHouse/pull/62083) ([Michael Kolupaev](https://github.com/al13n321)). +* Support executing function during assignment of parameterized view value. [#63502](https://github.com/ClickHouse/ClickHouse/pull/63502) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fixed parquet memory tracking. [#63584](https://github.com/ClickHouse/ClickHouse/pull/63584) ([Michael Kolupaev](https://github.com/al13n321)). +* Fixed reading of columns of type `Tuple(Map(LowCardinality(String), String), ...)`. [#63956](https://github.com/ClickHouse/ClickHouse/pull/63956) ([Anton Popov](https://github.com/CurtizJ)). +* Fix an `Cyclic aliases` error for cyclic aliases of different type (expression and function). [#63993](https://github.com/ClickHouse/ClickHouse/pull/63993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline. [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)). +* Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)). +* The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `duplicate alias` error for distributed queries with `ARRAY JOIN`. [#64226](https://github.com/ClickHouse/ClickHouse/pull/64226) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected accurateCast from string to integer. [#64255](https://github.com/ClickHouse/ClickHouse/pull/64255) ([wudidapaopao](https://github.com/wudidapaopao)). +* Fixed CNF simplification, in case any OR group contains mutually exclusive atoms. [#64256](https://github.com/ClickHouse/ClickHouse/pull/64256) ([Eduard Karacharov](https://github.com/korowa)). +* Fix Query Tree size validation. [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)). +* Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Prevent recursive logging in `blob_storage_log` when it's stored on object storage. [#64393](https://github.com/ClickHouse/ClickHouse/pull/64393) ([vdimir](https://github.com/vdimir)). +* Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed `optimize_read_in_order` behaviour for ORDER BY ... NULLS FIRST / LAST on tables with nullable keys. [#64483](https://github.com/ClickHouse/ClickHouse/pull/64483) ([Eduard Karacharov](https://github.com/korowa)). +* Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)). +* Fix the output of function `formatDateTimeInJodaSyntax` when a formatter generates an uneven number of characters and the last character is `0`. For example, `SELECT formatDateTimeInJodaSyntax(toDate('2012-05-29'), 'D')` now correctly returns `150` instead of previously `15`. [#64614](https://github.com/ClickHouse/ClickHouse/pull/64614) ([LiuNeng](https://github.com/liuneng1994)). +* Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)). +* Fix type inference for float (in case of small buffer, i.e. `--max_read_buffer_size 1`). [#64641](https://github.com/ClickHouse/ClickHouse/pull/64641) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)). +* Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed excessive part elimination by token-based text indexes (`ngrambf` , `full_text`) when filtering by result of `startsWith`, `endsWith`, `match`, `multiSearchAny`. [#64720](https://github.com/ClickHouse/ClickHouse/pull/64720) ([Eduard Karacharov](https://github.com/korowa)). +* Fixes incorrect behaviour of ANSI CSI escaping in the `UTF8::computeWidth` function. [#64756](https://github.com/ClickHouse/ClickHouse/pull/64756) ([Shaun Struwig](https://github.com/Blargian)). +* Fix a case of incorrect removal of `ORDER BY` / `LIMIT BY` across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)). +* Fix (experimental) unequal join with subqueries for sets which are in the mixed join conditions. [#64775](https://github.com/ClickHouse/ClickHouse/pull/64775) ([lgbo](https://github.com/lgbo-ustc)). +* Fix crash in a local cache over `plain_rewritable` disk. [#64778](https://github.com/ClickHouse/ClickHouse/pull/64778) ([Julia Kartseva](https://github.com/jkartseva)). +* Keeper fix: return correct value for `zk_latest_snapshot_size` in `mntr` command. [#64784](https://github.com/ClickHouse/ClickHouse/pull/64784) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix memory leak in slru cache policy. [#64803](https://github.com/ClickHouse/ClickHouse/pull/64803) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)). +* Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)). +* Fix duplicating `Delete` events in `blob_storage_log` in case of large batch to delete. [#64924](https://github.com/ClickHouse/ClickHouse/pull/64924) ([vdimir](https://github.com/vdimir)). +* Fixed `Session moved to another server` error from [Zoo]Keeper that might happen after server startup when the config has includes from [Zoo]Keeper. [#64986](https://github.com/ClickHouse/ClickHouse/pull/64986) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `host_id` in DatabaseReplicated when `cluster_secure_connection` parameter is enabled. Previously all the connections within the cluster created by DatabaseReplicated were not secure, even if the parameter was enabled. [#65054](https://github.com/ClickHouse/ClickHouse/pull/65054) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid writing to finalized buffer in File-like storages. [#65063](https://github.com/ClickHouse/ClickHouse/pull/65063) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible infinite query duration in case of cyclic aliases. Fixes [#64849](https://github.com/ClickHouse/ClickHouse/issues/64849). [#65081](https://github.com/ClickHouse/ClickHouse/pull/65081) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)). +* Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)). +* Respond with 5xx instead of 200 OK in case of receive timeout while reading (parts of) the request body from the client socket. [#65118](https://github.com/ClickHouse/ClickHouse/pull/65118) ([Julian Maicher](https://github.com/jmaicher)). +* Fix possible crash for hedged requests. [#65206](https://github.com/ClickHouse/ClickHouse/pull/65206) ([Azat Khuzhin](https://github.com/azat)). +* Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)). +* This PR ensures that the type of the constant(IN operator's second parameter) is always visible during the IN operator's type conversion process. Otherwise, losing type information may cause some conversions to fail, such as the conversion from DateTime to Date. This fixes ([#64487](https://github.com/ClickHouse/ClickHouse/issues/64487)). [#65315](https://github.com/ClickHouse/ClickHouse/pull/65315) ([pn](https://github.com/chloro-pn)). + +#### Build/Testing/Packaging Improvement +* Add support for LLVM XRay. [#64592](https://github.com/ClickHouse/ClickHouse/pull/64592) [#64837](https://github.com/ClickHouse/ClickHouse/pull/64837) ([Tomer Shafir](https://github.com/tomershafir)). +* Unite s3/hdfs/azure storage implementations into a single class working with IObjectStorage. Same for *Cluster, data lakes and Queue storages. [#59767](https://github.com/ClickHouse/ClickHouse/pull/59767) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Refactor data part writer to remove dependencies on MergeTreeData and DataPart. [#63620](https://github.com/ClickHouse/ClickHouse/pull/63620) ([Alexander Gololobov](https://github.com/davenger)). +* Refactor `KeyCondition` and key analysis to improve PartitionPruner and trivial count optimization. This is separated from [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463) . [#61459](https://github.com/ClickHouse/ClickHouse/pull/61459) ([Amos Bird](https://github.com/amosbird)). +* Introduce assertions to verify all functions are called with columns of the right size. [#63723](https://github.com/ClickHouse/ClickHouse/pull/63723) ([Raúl Marín](https://github.com/Algunenano)). +* Make `network` service be required when using the `rc` init script to start the ClickHouse server daemon. [#60650](https://github.com/ClickHouse/ClickHouse/pull/60650) ([Chun-Sheng, Li](https://github.com/peter279k)). +* Reduce the size of some slow tests. [#64387](https://github.com/ClickHouse/ClickHouse/pull/64387) [#64452](https://github.com/ClickHouse/ClickHouse/pull/64452) ([Raúl Marín](https://github.com/Algunenano)). +* Replay ZooKeeper logs using keeper-bench. [#62481](https://github.com/ClickHouse/ClickHouse/pull/62481) ([Antonio Andelic](https://github.com/antonio2368)). + ### ClickHouse release 24.5, 2024-05-30 #### Backward Incompatible Change diff --git a/CMakeLists.txt b/CMakeLists.txt index 601cbe7201c..f796e6c4616 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,7 +122,7 @@ add_library(global-libs INTERFACE) include (cmake/sanitize.cmake) -include (cmake/instrument.cmake) +include (cmake/xray_instrumentation.cmake) option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) @@ -319,7 +319,6 @@ endif() # Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off") -# Our built-in unwinder only supports DWARF version up to 4. set (DEBUG_INFO_FLAGS "-g") # Disable omit frame pointer compiler optimization using -fno-omit-frame-pointer @@ -333,15 +332,15 @@ endif() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") -set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") +set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") -set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") -set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") +set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") if (OS_DARWIN) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") @@ -399,7 +398,7 @@ option (ENABLE_GWP_ASAN "Enable Gwp-Asan" ON) # but GWP-ASan also wants to use mmap frequently, # and due to a large number of memory mappings, # it does not work together well. -if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")) +if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") OR SANITIZE) set(ENABLE_GWP_ASAN OFF) endif () diff --git a/README.md b/README.md index 73d989210b5..dc253d4db2d 100644 --- a/README.md +++ b/README.md @@ -34,20 +34,18 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.5 Community Call](https://clickhouse.com/company/events/v24-5-community-release-call) - May 30 +* [v24.6 Community Call](https://clickhouse.com/company/events/v24-6-community-release-call) - Jul 2 ## Upcoming Events Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [ClickHouse Happy Hour @ Tom's Watch Bar - Los Angeles](https://www.meetup.com/clickhouse-los-angeles-user-group/events/300740584/) - May 22 -* [ClickHouse & Confluent Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28 -* [ClickHouse Meetup in Stockholm](https://www.meetup.com/clickhouse-stockholm-user-group/events/299752651/) - Jun 3 -* [ClickHouse Meetup @ Cloudflare - San Francisco](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/300523061/) - Jun 4 -* [ClickHouse (クリックハウス) Meetup Tokyo](https://www.meetup.com/clickhouse-tokyo-user-group/events/300798053/) - Jun 5 +* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26 * [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27 * [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 +* [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9 * [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9 +* [AWS Summit in New York](https://clickhouse.com/company/events/2024-07-awssummit-nyc) - Jul 10 * [ClickHouse Meetup @ Klaviyo - Boston](https://www.meetup.com/clickhouse-boston-user-group/events/300907870) - Jul 11 ## Recent Recordings diff --git a/SECURITY.md b/SECURITY.md index 8635951dc0e..53328b6e16b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,6 +14,7 @@ The following versions of ClickHouse server are currently supported with securit | Version | Supported | |:-|:-| +| 24.6 | ✔️ | | 24.5 | ✔️ | | 24.4 | ✔️ | | 24.3 | ✔️ | diff --git a/base/base/EnumReflection.h b/base/base/EnumReflection.h index 4a9de4d17a3..e4e0ef672fd 100644 --- a/base/base/EnumReflection.h +++ b/base/base/EnumReflection.h @@ -32,7 +32,7 @@ constexpr void static_for(F && f) template struct fmt::formatter : fmt::formatter { - constexpr auto format(T value, auto& format_context) + constexpr auto format(T value, auto& format_context) const { return formatter::format(magic_enum::enum_name(value), format_context); } diff --git a/base/base/StringRef.h b/base/base/StringRef.h index 24af84626de..fc0674b8440 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include @@ -376,3 +378,5 @@ namespace PackedZeroTraits std::ostream & operator<<(std::ostream & os, const StringRef & str); + +template<> struct fmt::formatter : fmt::ostream_formatter {}; diff --git a/base/base/getFQDNOrHostName.cpp b/base/base/getFQDNOrHostName.cpp index 2a4ba8e2e11..6b3da9699b9 100644 --- a/base/base/getFQDNOrHostName.cpp +++ b/base/base/getFQDNOrHostName.cpp @@ -6,6 +6,9 @@ namespace { std::string getFQDNOrHostNameImpl() { +#if defined(OS_DARWIN) + return Poco::Net::DNS::hostName(); +#else try { return Poco::Net::DNS::thisHost().name(); @@ -14,6 +17,7 @@ namespace { return Poco::Net::DNS::hostName(); } +#endif } } diff --git a/base/base/wide_integer_to_string.h b/base/base/wide_integer_to_string.h index c2cbe8d82e3..f703a722afa 100644 --- a/base/base/wide_integer_to_string.h +++ b/base/base/wide_integer_to_string.h @@ -62,7 +62,7 @@ struct fmt::formatter> } template - auto format(const wide::integer & value, FormatContext & ctx) + auto format(const wide::integer & value, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}", to_string(value)); } diff --git a/base/poco/Crypto/src/OpenSSLInitializer.cpp b/base/poco/Crypto/src/OpenSSLInitializer.cpp index 23447760b47..31798e8dd7e 100644 --- a/base/poco/Crypto/src/OpenSSLInitializer.cpp +++ b/base/poco/Crypto/src/OpenSSLInitializer.cpp @@ -23,9 +23,6 @@ #include #endif -#if __has_feature(address_sanitizer) -#include -#endif using Poco::RandomInputStream; using Poco::Thread; @@ -70,18 +67,12 @@ void OpenSSLInitializer::initialize() SSL_library_init(); SSL_load_error_strings(); OpenSSL_add_all_algorithms(); - + char seed[SEEDSIZE]; RandomInputStream rnd; rnd.read(seed, sizeof(seed)); - { -# if __has_feature(address_sanitizer) - /// Leak sanitizer (part of address sanitizer) thinks that a few bytes of memory in OpenSSL are allocated during but never released. - __lsan::ScopedDisabler lsan_disabler; -#endif - RAND_seed(seed, SEEDSIZE); - } - + RAND_seed(seed, SEEDSIZE); + int nMutexes = CRYPTO_num_locks(); _mutexes = new Poco::FastMutex[nMutexes]; CRYPTO_set_locking_callback(&OpenSSLInitializer::lock); @@ -89,8 +80,8 @@ void OpenSSLInitializer::initialize() // https://sourceforge.net/p/poco/bugs/110/ // // From http://www.openssl.org/docs/crypto/threads.html : -// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(), -// then a default implementation is used - on Windows and BeOS this uses the system's +// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(), +// then a default implementation is used - on Windows and BeOS this uses the system's // default thread identifying APIs" CRYPTO_set_id_callback(&OpenSSLInitializer::id); CRYPTO_set_dynlock_create_callback(&OpenSSLInitializer::dynlockCreate); @@ -109,7 +100,7 @@ void OpenSSLInitializer::uninitialize() CRYPTO_set_locking_callback(0); CRYPTO_set_id_callback(0); delete [] _mutexes; - + CONF_modules_free(); } } diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt index dfb41a33fb1..324a0170bdd 100644 --- a/base/poco/Foundation/CMakeLists.txt +++ b/base/poco/Foundation/CMakeLists.txt @@ -213,6 +213,7 @@ target_compile_definitions (_poco_foundation ) target_include_directories (_poco_foundation SYSTEM PUBLIC "include") +target_link_libraries (_poco_foundation PRIVATE clickhouse_common_io) target_link_libraries (_poco_foundation PRIVATE diff --git a/base/poco/Foundation/include/Poco/ThreadPool.h b/base/poco/Foundation/include/Poco/ThreadPool.h index b9506cc5b7f..e2187bfeb66 100644 --- a/base/poco/Foundation/include/Poco/ThreadPool.h +++ b/base/poco/Foundation/include/Poco/ThreadPool.h @@ -48,7 +48,13 @@ class Foundation_API ThreadPool /// from the pool. { public: - ThreadPool(int minCapacity = 2, int maxCapacity = 16, int idleTime = 60, int stackSize = POCO_THREAD_STACK_SIZE); + explicit ThreadPool( + int minCapacity = 2, + int maxCapacity = 16, + int idleTime = 60, + int stackSize = POCO_THREAD_STACK_SIZE, + size_t global_profiler_real_time_period_ns_ = 0, + size_t global_profiler_cpu_time_period_ns_ = 0); /// Creates a thread pool with minCapacity threads. /// If required, up to maxCapacity threads are created /// a NoThreadAvailableException exception is thrown. @@ -56,8 +62,14 @@ public: /// and more than minCapacity threads are running, the thread /// is killed. Threads are created with given stack size. - ThreadPool( - const std::string & name, int minCapacity = 2, int maxCapacity = 16, int idleTime = 60, int stackSize = POCO_THREAD_STACK_SIZE); + explicit ThreadPool( + const std::string & name, + int minCapacity = 2, + int maxCapacity = 16, + int idleTime = 60, + int stackSize = POCO_THREAD_STACK_SIZE, + size_t global_profiler_real_time_period_ns_ = 0, + size_t global_profiler_cpu_time_period_ns_ = 0); /// Creates a thread pool with the given name and minCapacity threads. /// If required, up to maxCapacity threads are created /// a NoThreadAvailableException exception is thrown. @@ -171,6 +183,8 @@ private: int _serial; int _age; int _stackSize; + size_t _globalProfilerRealTimePeriodNs; + size_t _globalProfilerCPUTimePeriodNs; ThreadVec _threads; mutable FastMutex _mutex; }; diff --git a/base/poco/Foundation/src/ThreadPool.cpp b/base/poco/Foundation/src/ThreadPool.cpp index 6335ee82b47..f57c81e4128 100644 --- a/base/poco/Foundation/src/ThreadPool.cpp +++ b/base/poco/Foundation/src/ThreadPool.cpp @@ -20,6 +20,7 @@ #include "Poco/ErrorHandler.h" #include #include +#include namespace Poco { @@ -28,7 +29,11 @@ namespace Poco { class PooledThread: public Runnable { public: - PooledThread(const std::string& name, int stackSize = POCO_THREAD_STACK_SIZE); + explicit PooledThread( + const std::string& name, + int stackSize = POCO_THREAD_STACK_SIZE, + size_t globalProfilerRealTimePeriodNs_ = 0, + size_t globalProfilerCPUTimePeriodNs_ = 0); ~PooledThread(); void start(); @@ -51,16 +56,24 @@ private: Event _targetCompleted; Event _started; FastMutex _mutex; + size_t _globalProfilerRealTimePeriodNs; + size_t _globalProfilerCPUTimePeriodNs; }; -PooledThread::PooledThread(const std::string& name, int stackSize): - _idle(true), - _idleTime(0), - _pTarget(0), - _name(name), +PooledThread::PooledThread( + const std::string& name, + int stackSize, + size_t globalProfilerRealTimePeriodNs_, + size_t globalProfilerCPUTimePeriodNs_) : + _idle(true), + _idleTime(0), + _pTarget(0), + _name(name), _thread(name), - _targetCompleted(false) + _targetCompleted(false), + _globalProfilerRealTimePeriodNs(globalProfilerRealTimePeriodNs_), + _globalProfilerCPUTimePeriodNs(globalProfilerCPUTimePeriodNs_) { poco_assert_dbg (stackSize >= 0); _thread.setStackSize(stackSize); @@ -83,7 +96,7 @@ void PooledThread::start() void PooledThread::start(Thread::Priority priority, Runnable& target) { FastMutex::ScopedLock lock(_mutex); - + poco_assert (_pTarget == 0); _pTarget = ⌖ @@ -109,7 +122,7 @@ void PooledThread::start(Thread::Priority priority, Runnable& target, const std: } _thread.setName(fullName); _thread.setPriority(priority); - + poco_assert (_pTarget == 0); _pTarget = ⌖ @@ -145,7 +158,7 @@ void PooledThread::join() void PooledThread::activate() { FastMutex::ScopedLock lock(_mutex); - + poco_assert (_idle); _idle = false; _targetCompleted.reset(); @@ -155,7 +168,7 @@ void PooledThread::activate() void PooledThread::release() { const long JOIN_TIMEOUT = 10000; - + _mutex.lock(); _pTarget = 0; _mutex.unlock(); @@ -174,6 +187,10 @@ void PooledThread::release() void PooledThread::run() { + DB::ThreadStatus thread_status; + if (unlikely(_globalProfilerRealTimePeriodNs != 0 || _globalProfilerCPUTimePeriodNs != 0)) + thread_status.initGlobalProfiler(_globalProfilerRealTimePeriodNs, _globalProfilerCPUTimePeriodNs); + _started.set(); for (;;) { @@ -220,13 +237,17 @@ void PooledThread::run() ThreadPool::ThreadPool(int minCapacity, int maxCapacity, int idleTime, - int stackSize): - _minCapacity(minCapacity), - _maxCapacity(maxCapacity), + int stackSize, + size_t globalProfilerRealTimePeriodNs_, + size_t globalProfilerCPUTimePeriodNs_) : + _minCapacity(minCapacity), + _maxCapacity(maxCapacity), _idleTime(idleTime), _serial(0), _age(0), - _stackSize(stackSize) + _stackSize(stackSize), + _globalProfilerRealTimePeriodNs(globalProfilerRealTimePeriodNs_), + _globalProfilerCPUTimePeriodNs(globalProfilerCPUTimePeriodNs_) { poco_assert (minCapacity >= 1 && maxCapacity >= minCapacity && idleTime > 0); @@ -243,14 +264,18 @@ ThreadPool::ThreadPool(const std::string& name, int minCapacity, int maxCapacity, int idleTime, - int stackSize): + int stackSize, + size_t globalProfilerRealTimePeriodNs_, + size_t globalProfilerCPUTimePeriodNs_) : _name(name), - _minCapacity(minCapacity), - _maxCapacity(maxCapacity), + _minCapacity(minCapacity), + _maxCapacity(maxCapacity), _idleTime(idleTime), _serial(0), _age(0), - _stackSize(stackSize) + _stackSize(stackSize), + _globalProfilerRealTimePeriodNs(globalProfilerRealTimePeriodNs_), + _globalProfilerCPUTimePeriodNs(globalProfilerCPUTimePeriodNs_) { poco_assert (minCapacity >= 1 && maxCapacity >= minCapacity && idleTime > 0); @@ -393,15 +418,15 @@ void ThreadPool::housekeep() ThreadVec activeThreads; idleThreads.reserve(_threads.size()); activeThreads.reserve(_threads.size()); - + for (ThreadVec::iterator it = _threads.begin(); it != _threads.end(); ++it) { if ((*it)->idle()) { if ((*it)->idleTime() < _idleTime) idleThreads.push_back(*it); - else - expiredThreads.push_back(*it); + else + expiredThreads.push_back(*it); } else activeThreads.push_back(*it); } @@ -463,7 +488,7 @@ PooledThread* ThreadPool::createThread() { std::ostringstream name; name << _name << "[#" << ++_serial << "]"; - return new PooledThread(name.str(), _stackSize); + return new PooledThread(name.str(), _stackSize, _globalProfilerRealTimePeriodNs, _globalProfilerCPUTimePeriodNs); } @@ -481,7 +506,7 @@ public: ThreadPool* pool() { FastMutex::ScopedLock lock(_mutex); - + if (!_pPool) { _pPool = new ThreadPool("default"); @@ -490,7 +515,7 @@ public: } return _pPool; } - + private: ThreadPool* _pPool; FastMutex _mutex; diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h index e4037c87927..25dc133fb20 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h @@ -17,6 +17,7 @@ #ifndef NetSSL_SSLManager_INCLUDED #define NetSSL_SSLManager_INCLUDED +#include #include #include "Poco/BasicEvent.h" @@ -219,6 +220,13 @@ namespace Net /// Unless initializeClient() has been called, the first call to this method initializes the default Context /// from the application configuration. + Context::Ptr getCustomServerContext(const std::string & name); + /// Return custom Context used by the server. + + Context::Ptr setCustomServerContext(const std::string & name, Context::Ptr ctx); + /// Set custom Context used by the server. + /// Return pointer on inserted Context or on old Context if exists. + PrivateKeyPassphraseHandlerPtr serverPassphraseHandler(); /// Returns the configured passphrase handler of the server. If none is set, the method will create a default one /// from an application configuration. @@ -258,6 +266,40 @@ namespace Net static const std::string CFG_SERVER_PREFIX; static const std::string CFG_CLIENT_PREFIX; + static const std::string CFG_PRIV_KEY_FILE; + static const std::string CFG_CERTIFICATE_FILE; + static const std::string CFG_CA_LOCATION; + static const std::string CFG_VER_MODE; + static const Context::VerificationMode VAL_VER_MODE; + static const std::string CFG_VER_DEPTH; + static const int VAL_VER_DEPTH; + static const std::string CFG_ENABLE_DEFAULT_CA; + static const bool VAL_ENABLE_DEFAULT_CA; + static const std::string CFG_CIPHER_LIST; + static const std::string CFG_CYPHER_LIST; // for backwards compatibility + static const std::string VAL_CIPHER_LIST; + static const std::string CFG_PREFER_SERVER_CIPHERS; + static const std::string CFG_DELEGATE_HANDLER; + static const std::string VAL_DELEGATE_HANDLER; + static const std::string CFG_CERTIFICATE_HANDLER; + static const std::string VAL_CERTIFICATE_HANDLER; + static const std::string CFG_CACHE_SESSIONS; + static const std::string CFG_SESSION_ID_CONTEXT; + static const std::string CFG_SESSION_CACHE_SIZE; + static const std::string CFG_SESSION_TIMEOUT; + static const std::string CFG_EXTENDED_VERIFICATION; + static const std::string CFG_REQUIRE_TLSV1; + static const std::string CFG_REQUIRE_TLSV1_1; + static const std::string CFG_REQUIRE_TLSV1_2; + static const std::string CFG_DISABLE_PROTOCOLS; + static const std::string CFG_DH_PARAMS_FILE; + static const std::string CFG_ECDH_CURVE; + +#ifdef OPENSSL_FIPS + static const std::string CFG_FIPS_MODE; + static const bool VAL_FIPS_MODE; +#endif + protected: static int verifyClientCallback(int ok, X509_STORE_CTX * pStore); /// The return value of this method defines how errors in @@ -314,39 +356,7 @@ namespace Net InvalidCertificateHandlerPtr _ptrClientCertificateHandler; Poco::FastMutex _mutex; - static const std::string CFG_PRIV_KEY_FILE; - static const std::string CFG_CERTIFICATE_FILE; - static const std::string CFG_CA_LOCATION; - static const std::string CFG_VER_MODE; - static const Context::VerificationMode VAL_VER_MODE; - static const std::string CFG_VER_DEPTH; - static const int VAL_VER_DEPTH; - static const std::string CFG_ENABLE_DEFAULT_CA; - static const bool VAL_ENABLE_DEFAULT_CA; - static const std::string CFG_CIPHER_LIST; - static const std::string CFG_CYPHER_LIST; // for backwards compatibility - static const std::string VAL_CIPHER_LIST; - static const std::string CFG_PREFER_SERVER_CIPHERS; - static const std::string CFG_DELEGATE_HANDLER; - static const std::string VAL_DELEGATE_HANDLER; - static const std::string CFG_CERTIFICATE_HANDLER; - static const std::string VAL_CERTIFICATE_HANDLER; - static const std::string CFG_CACHE_SESSIONS; - static const std::string CFG_SESSION_ID_CONTEXT; - static const std::string CFG_SESSION_CACHE_SIZE; - static const std::string CFG_SESSION_TIMEOUT; - static const std::string CFG_EXTENDED_VERIFICATION; - static const std::string CFG_REQUIRE_TLSV1; - static const std::string CFG_REQUIRE_TLSV1_1; - static const std::string CFG_REQUIRE_TLSV1_2; - static const std::string CFG_DISABLE_PROTOCOLS; - static const std::string CFG_DH_PARAMS_FILE; - static const std::string CFG_ECDH_CURVE; - -#ifdef OPENSSL_FIPS - static const std::string CFG_FIPS_MODE; - static const bool VAL_FIPS_MODE; -#endif + std::unordered_map _mapPtrServerContexts; friend class Poco::SingletonHolder; friend class Context; diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h index 49c12b6b45f..890752c52da 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h @@ -235,8 +235,6 @@ namespace Net /// Note that simply closing a socket is not sufficient /// to be able to re-use it again. - Poco::Timespan getMaxTimeout(); - private: SecureSocketImpl(const SecureSocketImpl &); SecureSocketImpl & operator=(const SecureSocketImpl &); @@ -250,6 +248,9 @@ namespace Net Session::Ptr _pSession; friend class SecureStreamSocketImpl; + + Poco::Timespan getMaxTimeoutOrLimit(); + //// Return max(send, receive) if non zero, otherwise maximum timeout }; diff --git a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp index 7f6cc9abcb2..ae04a994786 100644 --- a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp @@ -330,27 +330,26 @@ void SSLManager::initDefaultContext(bool server) else _ptrDefaultClientContext->disableProtocols(disabledProtocols); - /// Temporarily disabled during the transition from boringssl to OpenSSL due to tsan issues. - /// bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false); - /// if (server) - /// { - /// std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", "")); - /// _ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext); - /// if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE)) - /// { - /// int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE); - /// _ptrDefaultServerContext->setSessionCacheSize(cacheSize); - /// } - /// if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT)) - /// { - /// int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT); - /// _ptrDefaultServerContext->setSessionTimeout(timeout); - /// } - /// } - /// else - /// { - /// _ptrDefaultClientContext->enableSessionCache(cacheSessions); - /// } + bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false); + if (server) + { + std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", "")); + _ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext); + if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE)) + { + int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE); + _ptrDefaultServerContext->setSessionCacheSize(cacheSize); + } + if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT)) + { + int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT); + _ptrDefaultServerContext->setSessionTimeout(timeout); + } + } + else + { + _ptrDefaultClientContext->enableSessionCache(cacheSessions); + } bool extendedVerification = config.getBool(prefix + CFG_EXTENDED_VERIFICATION, false); if (server) _ptrDefaultServerContext->enableExtendedCertificateVerification(extendedVerification); @@ -429,6 +428,23 @@ void SSLManager::initCertificateHandler(bool server) } +Context::Ptr SSLManager::getCustomServerContext(const std::string & name) +{ + Poco::FastMutex::ScopedLock lock(_mutex); + auto it = _mapPtrServerContexts.find(name); + if (it != _mapPtrServerContexts.end()) + return it->second; + return nullptr; +} + +Context::Ptr SSLManager::setCustomServerContext(const std::string & name, Context::Ptr ctx) +{ + Poco::FastMutex::ScopedLock lock(_mutex); + ctx = _mapPtrServerContexts.insert({name, ctx}).first->second; + return ctx; +} + + Poco::Util::AbstractConfiguration& SSLManager::appConfig() { try diff --git a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp index efe25f65909..4873d259ae5 100644 --- a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp @@ -199,7 +199,7 @@ void SecureSocketImpl::connectSSL(bool performHandshake) if (performHandshake && _pSocket->getBlocking()) { int ret; - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -302,7 +302,7 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags) return rc; } - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -338,7 +338,7 @@ int SecureSocketImpl::receiveBytes(void* buffer, int length, int flags) return rc; } - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { /// SSL record may consist of several TCP packets, @@ -372,7 +372,7 @@ int SecureSocketImpl::completeHandshake() poco_check_ptr (_pSSL); int rc; - Poco::Timespan remaining_time = getMaxTimeout(); + Poco::Timespan remaining_time = getMaxTimeoutOrLimit(); do { RemainingTimeCounter counter(remaining_time); @@ -453,18 +453,29 @@ X509* SecureSocketImpl::peerCertificate() const return 0; } -Poco::Timespan SecureSocketImpl::getMaxTimeout() +Poco::Timespan SecureSocketImpl::getMaxTimeoutOrLimit() { std::lock_guard lock(_mutex); Poco::Timespan remaining_time = _pSocket->getReceiveTimeout(); Poco::Timespan send_timeout = _pSocket->getSendTimeout(); if (remaining_time < send_timeout) remaining_time = send_timeout; + /// zero SO_SNDTIMEO/SO_RCVTIMEO works as no timeout, let's replicate this + /// + /// NOTE: we cannot use INT64_MAX (std::numeric_limits::max()), + /// since it will be later passed to poll() which accept int timeout, and + /// even though poll() accepts milliseconds and Timespan() accepts + /// microseconds, let's use smaller maximum value just to avoid some possible + /// issues, this should be enough anyway (it is ~24 days). + if (remaining_time == 0) + remaining_time = Poco::Timespan(std::numeric_limits::max()); return remaining_time; } bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) { + if (remaining_time == 0) + return false; std::lock_guard lock(_mutex); if (rc <= 0) { @@ -475,9 +486,7 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) case SSL_ERROR_WANT_READ: if (_pSocket->getBlocking()) { - /// Level-triggered mode of epoll_wait is used, so if SSL_read don't read all available data from socket, - /// epoll_wait returns true without waiting for new data even if remaining_time == 0 - if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ) && remaining_time != 0) + if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ)) return true; else throw Poco::TimeoutException(); @@ -486,13 +495,15 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time) case SSL_ERROR_WANT_WRITE: if (_pSocket->getBlocking()) { - /// The same as for SSL_ERROR_WANT_READ - if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE) && remaining_time != 0) + if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE)) return true; else throw Poco::TimeoutException(); } break; + /// NOTE: POCO_EINTR is the same as SSL_ERROR_WANT_READ (at least in + /// OpenSSL), so this likely dead code, but let's leave it for + /// compatibility with other implementations case SSL_ERROR_SYSCALL: return socketError == POCO_EAGAIN || socketError == POCO_EINTR; default: diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index dfbbb66a1e9..bb776fa9506 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,12 +1,12 @@ # This variables autochanged by tests/ci/version_helper.py: -# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, +# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54487) +SET(VERSION_REVISION 54488) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 6) +SET(VERSION_MINOR 7) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 70a1d3a63d47f0be077d67b8deb907230fc7cfb0) -SET(VERSION_DESCRIBE v24.6.1.1-testing) -SET(VERSION_STRING 24.6.1.1) +SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9) +SET(VERSION_DESCRIBE v24.7.1.1-testing) +SET(VERSION_STRING 24.7.1.1) # end of autochange diff --git a/cmake/instrument.cmake b/cmake/xray_instrumentation.cmake similarity index 91% rename from cmake/instrument.cmake rename to cmake/xray_instrumentation.cmake index bd2fb4d45fc..661c0575e54 100644 --- a/cmake/instrument.cmake +++ b/cmake/xray_instrumentation.cmake @@ -7,7 +7,7 @@ if (NOT ENABLE_XRAY) return() endif() -if (NOT (ARCH_AMD64 AND (OS_LINUX OR OS_FREEBSD))) +if (NOT (ARCH_AMD64 AND OS_LINUX)) message (STATUS "Not using LLVM XRay, only amd64 Linux or FreeBSD are supported") return() endif() diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 3bd86026c93..a3c4dd3e77f 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 3bd86026c93da5a40006fd53403dff9d5f5e30e3 +Subproject commit a3c4dd3e77f28b526efbb0eb394b72e29c633936 diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt index 7372195bb0d..4137547b736 100644 --- a/contrib/abseil-cpp-cmake/CMakeLists.txt +++ b/contrib/abseil-cpp-cmake/CMakeLists.txt @@ -1,6 +1,8 @@ set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}") +# This is a minimized version of the function definition in CMake/AbseilHelpers.cmake + # # Copyright 2017 The Abseil Authors. # @@ -16,7 +18,6 @@ set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}") # See the License for the specific language governing permissions and # limitations under the License. # - function(absl_cc_library) cmake_parse_arguments(ABSL_CC_LIB "DISABLE_INSTALL;PUBLIC;TESTONLY" @@ -76,6 +77,12 @@ function(absl_cc_library) add_library(absl::${ABSL_CC_LIB_NAME} ALIAS ${_NAME}) endfunction() +# The following definitions are an amalgamation of the CMakeLists.txt files in absl/*/ +# To refresh them when upgrading to a new version: +# - copy them over from upstream +# - remove calls of 'absl_cc_test' +# - remove calls of `absl_cc_library` that contain `TESTONLY` +# - append '${DIR}' to the file definitions set(DIR ${ABSL_ROOT_DIR}/absl/algorithm) @@ -102,12 +109,12 @@ absl_cc_library( absl::algorithm absl::core_headers absl::meta + absl::nullability PUBLIC ) set(DIR ${ABSL_ROOT_DIR}/absl/base) -# Internal-only target, do not depend on directly. absl_cc_library( NAME atomic_hook @@ -146,6 +153,18 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} ) +absl_cc_library( + NAME + no_destructor + HDRS + "${DIR}/no_destructor.h" + DEPS + absl::config + absl::nullability + COPTS + ${ABSL_DEFAULT_COPTS} +) + absl_cc_library( NAME nullability @@ -305,6 +324,8 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} LINKOPTS ${ABSL_DEFAULT_LINKOPTS} + $<$:-lrt> + $<$:-ladvapi32> DEPS absl::atomic_hook absl::base_internal @@ -312,6 +333,7 @@ absl_cc_library( absl::core_headers absl::dynamic_annotations absl::log_severity + absl::nullability absl::raw_logging_internal absl::spinlock_wait absl::type_traits @@ -357,6 +379,7 @@ absl_cc_library( absl::base absl::config absl::core_headers + absl::nullability PUBLIC ) @@ -467,10 +490,11 @@ absl_cc_library( LINKOPTS ${ABSL_DEFAULT_LINKOPTS} DEPS - absl::container_common absl::common_policy_traits absl::compare absl::compressed_tuple + absl::config + absl::container_common absl::container_memory absl::cord absl::core_headers @@ -480,7 +504,6 @@ absl_cc_library( absl::strings absl::throw_delegate absl::type_traits - absl::utility ) # Internal-only target, do not depend on directly. @@ -523,7 +546,9 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::base_internal absl::compressed_tuple + absl::config absl::core_headers absl::memory absl::span @@ -548,18 +573,6 @@ absl_cc_library( PUBLIC ) -# Internal-only target, do not depend on directly. -absl_cc_library( - NAME - counting_allocator - HDRS - "${DIR}/internal/counting_allocator.h" - COPTS - ${ABSL_DEFAULT_COPTS} - DEPS - absl::config -) - absl_cc_library( NAME flat_hash_map @@ -570,7 +583,7 @@ absl_cc_library( DEPS absl::container_memory absl::core_headers - absl::hash_function_defaults + absl::hash_container_defaults absl::raw_hash_map absl::algorithm_container absl::memory @@ -586,7 +599,7 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} DEPS absl::container_memory - absl::hash_function_defaults + absl::hash_container_defaults absl::raw_hash_set absl::algorithm_container absl::core_headers @@ -604,7 +617,7 @@ absl_cc_library( DEPS absl::container_memory absl::core_headers - absl::hash_function_defaults + absl::hash_container_defaults absl::node_slot_policy absl::raw_hash_map absl::algorithm_container @@ -620,8 +633,9 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::container_memory absl::core_headers - absl::hash_function_defaults + absl::hash_container_defaults absl::node_slot_policy absl::raw_hash_set absl::algorithm_container @@ -629,6 +643,19 @@ absl_cc_library( PUBLIC ) +absl_cc_library( + NAME + hash_container_defaults + HDRS + "${DIR}/hash_container_defaults.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::hash_function_defaults + PUBLIC +) + # Internal-only target, do not depend on directly. absl_cc_library( NAME @@ -655,9 +682,11 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} DEPS absl::config + absl::container_common absl::cord absl::hash absl::strings + absl::type_traits PUBLIC ) @@ -703,6 +732,7 @@ absl_cc_library( absl::base absl::config absl::exponential_biased + absl::no_destructor absl::raw_logging_internal absl::sample_recorder absl::synchronization @@ -756,7 +786,9 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::config absl::container_memory + absl::core_headers absl::raw_hash_set absl::throw_delegate PUBLIC @@ -817,6 +849,7 @@ absl_cc_library( DEPS absl::config absl::core_headers + absl::debugging_internal absl::meta absl::strings absl::span @@ -931,6 +964,7 @@ absl_cc_library( absl::crc32c absl::config absl::strings + absl::no_destructor ) set(DIR ${ABSL_ROOT_DIR}/absl/debugging) @@ -954,6 +988,8 @@ absl_cc_library( "${DIR}/stacktrace.cc" COPTS ${ABSL_DEFAULT_COPTS} + LINKOPTS + $<$:${EXECINFO_LIBRARY}> DEPS absl::debugging_internal absl::config @@ -980,6 +1016,7 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} LINKOPTS ${ABSL_DEFAULT_LINKOPTS} + $<$:-ldbghelp> DEPS absl::debugging_internal absl::demangle_internal @@ -1058,8 +1095,10 @@ absl_cc_library( demangle_internal HDRS "${DIR}/internal/demangle.h" + "${DIR}/internal/demangle_rust.h" SRCS "${DIR}/internal/demangle.cc" + "${DIR}/internal/demangle_rust.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS @@ -1252,6 +1291,7 @@ absl_cc_library( absl::strings absl::synchronization absl::flat_hash_map + absl::no_destructor ) # Internal-only target, do not depend on directly. @@ -1283,12 +1323,9 @@ absl_cc_library( absl_cc_library( NAME flags - SRCS - "${DIR}/flag.cc" HDRS "${DIR}/declare.h" "${DIR}/flag.h" - "${DIR}/internal/flag_msvc.inc" COPTS ${ABSL_DEFAULT_COPTS} LINKOPTS @@ -1299,7 +1336,6 @@ absl_cc_library( absl::flags_config absl::flags_internal absl::flags_reflection - absl::base absl::core_headers absl::strings ) @@ -1379,6 +1415,9 @@ absl_cc_library( absl::synchronization ) +############################################################################ +# Unit tests in alphabetical order. + set(DIR ${ABSL_ROOT_DIR}/absl/functional) absl_cc_library( @@ -1431,6 +1470,18 @@ absl_cc_library( PUBLIC ) +absl_cc_library( + NAME + overload + HDRS + "${DIR}/overload.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::meta + PUBLIC +) + set(DIR ${ABSL_ROOT_DIR}/absl/hash) absl_cc_library( @@ -1640,6 +1691,7 @@ absl_cc_library( absl::log_internal_conditions absl::log_internal_message absl::log_internal_strip + absl::absl_vlog_is_on ) absl_cc_library( @@ -1721,6 +1773,7 @@ absl_cc_library( absl::log_entry absl::log_severity absl::log_sink + absl::no_destructor absl::raw_logging_internal absl::synchronization absl::span @@ -1771,6 +1824,7 @@ absl_cc_library( LINKOPTS ${ABSL_DEFAULT_LINKOPTS} DEPS + absl::core_headers absl::log_internal_message absl::log_internal_nullstream absl::log_severity @@ -1876,6 +1930,11 @@ absl_cc_library( PUBLIC ) +# Warning: Many linkers will strip the contents of this library because its +# symbols are only used in a global constructor. A workaround is for clients +# to link this using $ instead of +# the plain absl::log_flags. +# TODO(b/320467376): Implement the equivalent of Bazel's alwayslink=True. absl_cc_library( NAME log_flags @@ -1897,6 +1956,7 @@ absl_cc_library( absl::flags absl::flags_marshalling absl::strings + absl::vlog_config_internal PUBLIC ) @@ -1919,6 +1979,7 @@ absl_cc_library( absl::log_severity absl::raw_logging_internal absl::strings + absl::vlog_config_internal ) absl_cc_library( @@ -1952,6 +2013,7 @@ absl_cc_library( ${ABSL_DEFAULT_LINKOPTS} DEPS absl::log_internal_log_impl + absl::vlog_is_on PUBLIC ) @@ -2064,21 +2126,75 @@ absl_cc_library( ) absl_cc_library( - NAME - log_internal_fnmatch - SRCS - "${DIR}/internal/fnmatch.cc" - HDRS - "${DIR}/internal/fnmatch.h" - COPTS - ${ABSL_DEFAULT_COPTS} - LINKOPTS - ${ABSL_DEFAULT_LINKOPTS} - DEPS - absl::config - absl::strings + NAME + vlog_config_internal + SRCS + "${DIR}/internal/vlog_config.cc" + HDRS + "${DIR}/internal/vlog_config.h" + COPTS + ${ABSL_DEFAULT_COPTS} + LINKOPTS + ${ABSL_DEFAULT_LINKOPTS} + DEPS + absl::base + absl::config + absl::core_headers + absl::log_internal_fnmatch + absl::memory + absl::no_destructor + absl::strings + absl::synchronization + absl::optional ) +absl_cc_library( + NAME + absl_vlog_is_on + COPTS + ${ABSL_DEFAULT_COPTS} + LINKOPTS + ${ABSL_DEFAULT_LINKOPTS} + HDRS + "${DIR}/absl_vlog_is_on.h" + DEPS + absl::vlog_config_internal + absl::config + absl::core_headers + absl::strings +) + +absl_cc_library( + NAME + vlog_is_on + COPTS + ${ABSL_DEFAULT_COPTS} + LINKOPTS + ${ABSL_DEFAULT_LINKOPTS} + HDRS + "${DIR}/vlog_is_on.h" + DEPS + absl::absl_vlog_is_on +) + +absl_cc_library( + NAME + log_internal_fnmatch + SRCS + "${DIR}/internal/fnmatch.cc" + HDRS + "${DIR}/internal/fnmatch.h" + COPTS + ${ABSL_DEFAULT_COPTS} + LINKOPTS + ${ABSL_DEFAULT_LINKOPTS} + DEPS + absl::config + absl::strings +) + +# Test targets + set(DIR ${ABSL_ROOT_DIR}/absl/memory) absl_cc_library( @@ -2147,6 +2263,7 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::compare absl::config absl::core_headers absl::bits @@ -2176,6 +2293,8 @@ absl_cc_library( PUBLIC ) +set(DIR ${ABSL_ROOT_DIR}/absl/profiling) + absl_cc_library( NAME sample_recorder @@ -2188,8 +2307,6 @@ absl_cc_library( absl::synchronization ) -set(DIR ${ABSL_ROOT_DIR}/absl/profiling) - absl_cc_library( NAME exponential_biased @@ -2265,6 +2382,7 @@ absl_cc_library( LINKOPTS ${ABSL_DEFAULT_LINKOPTS} DEPS + absl::config absl::fast_type_id absl::optional ) @@ -2336,11 +2454,13 @@ absl_cc_library( DEPS absl::config absl::inlined_vector + absl::nullability absl::random_internal_pool_urbg absl::random_internal_salted_seed_seq absl::random_internal_seed_material absl::random_seed_gen_exception absl::span + absl::string_view ) # Internal-only target, do not depend on directly. @@ -2399,6 +2519,7 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} LINKOPTS ${ABSL_DEFAULT_LINKOPTS} + $<$:-lbcrypt> DEPS absl::core_headers absl::optional @@ -2658,6 +2779,29 @@ absl_cc_library( absl::config ) +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + random_internal_distribution_test_util + SRCS + "${DIR}/internal/chi_square.cc" + "${DIR}/internal/distribution_test_util.cc" + HDRS + "${DIR}/internal/chi_square.h" + "${DIR}/internal/distribution_test_util.h" + COPTS + ${ABSL_DEFAULT_COPTS} + LINKOPTS + ${ABSL_DEFAULT_LINKOPTS} + DEPS + absl::config + absl::core_headers + absl::raw_logging_internal + absl::strings + absl::str_format + absl::span +) + # Internal-only target, do not depend on directly. absl_cc_library( NAME @@ -2699,6 +2843,8 @@ absl_cc_library( absl::function_ref absl::inlined_vector absl::memory + absl::no_destructor + absl::nullability absl::optional absl::raw_logging_internal absl::span @@ -2724,8 +2870,11 @@ absl_cc_library( absl::base absl::config absl::core_headers + absl::has_ostream_operator + absl::nullability absl::raw_logging_internal absl::status + absl::str_format absl::strings absl::type_traits absl::utility @@ -2748,6 +2897,7 @@ absl_cc_library( absl::base absl::config absl::core_headers + absl::nullability absl::throw_delegate PUBLIC ) @@ -2762,6 +2912,7 @@ absl_cc_library( "${DIR}/has_absl_stringify.h" "${DIR}/internal/damerau_levenshtein_distance.h" "${DIR}/internal/string_constant.h" + "${DIR}/internal/has_absl_stringify.h" "${DIR}/match.h" "${DIR}/numbers.h" "${DIR}/str_cat.h" @@ -2805,6 +2956,7 @@ absl_cc_library( absl::endian absl::int128 absl::memory + absl::nullability absl::raw_logging_internal absl::throw_delegate absl::type_traits @@ -2824,6 +2976,18 @@ absl_cc_library( PUBLIC ) +absl_cc_library( + NAME + has_ostream_operator + HDRS + "${DIR}/has_ostream_operator.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + PUBLIC +) + # Internal-only target, do not depend on directly. absl_cc_library( NAME @@ -2855,7 +3019,12 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::config + absl::core_headers + absl::nullability + absl::span absl::str_format_internal + absl::string_view PUBLIC ) @@ -2886,6 +3055,7 @@ absl_cc_library( absl::strings absl::config absl::core_headers + absl::fixed_array absl::inlined_vector absl::numeric_representation absl::type_traits @@ -2989,6 +3159,7 @@ absl_cc_library( DEPS absl::base absl::config + absl::no_destructor absl::raw_logging_internal absl::synchronization ) @@ -3079,6 +3250,7 @@ absl_cc_library( absl::endian absl::function_ref absl::inlined_vector + absl::nullability absl::optional absl::raw_logging_internal absl::span @@ -3246,6 +3418,8 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} DEPS Threads::Threads + # TODO(#1495): Use $ once our + # minimum CMake version >= 3.24 $<$:-Wl,-framework,CoreFoundation> ) @@ -3286,8 +3460,8 @@ absl_cc_library( NAME bad_any_cast_impl SRCS - "${DIR}/bad_any_cast.h" - "${DIR}/bad_any_cast.cc" + "${DIR}/bad_any_cast.h" + "${DIR}/bad_any_cast.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS @@ -3307,6 +3481,7 @@ absl_cc_library( DEPS absl::algorithm absl::core_headers + absl::nullability absl::throw_delegate absl::type_traits PUBLIC @@ -3327,6 +3502,7 @@ absl_cc_library( absl::config absl::core_headers absl::memory + absl::nullability absl::type_traits absl::utility PUBLIC @@ -3389,6 +3565,7 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::config absl::core_headers absl::type_traits PUBLIC diff --git a/contrib/aws b/contrib/aws index deeaa9e7c5f..1c2946bfcb7 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit deeaa9e7c5fe690e3dacc4005d7ecfa7a66a32bb +Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf diff --git a/contrib/cld2 b/contrib/cld2 index bc6d493a2f6..217ba8b8805 160000 --- a/contrib/cld2 +++ b/contrib/cld2 @@ -1 +1 @@ -Subproject commit bc6d493a2f64ed1fc1c4c4b4294a542a04e04217 +Subproject commit 217ba8b8805b41557faadaa47bb6e99f2242eea3 diff --git a/contrib/fmtlib b/contrib/fmtlib index b6f4ceaed0a..a33701196ad 160000 --- a/contrib/fmtlib +++ b/contrib/fmtlib @@ -1 +1 @@ -Subproject commit b6f4ceaed0a0a24ccf575fab6c56dd50ccf6f1a9 +Subproject commit a33701196adfad74917046096bf5a2aa0ab0bb50 diff --git a/contrib/fmtlib-cmake/CMakeLists.txt b/contrib/fmtlib-cmake/CMakeLists.txt index fe399ddc6e1..6625e411295 100644 --- a/contrib/fmtlib-cmake/CMakeLists.txt +++ b/contrib/fmtlib-cmake/CMakeLists.txt @@ -13,7 +13,6 @@ set (SRCS ${FMT_SOURCE_DIR}/include/fmt/core.h ${FMT_SOURCE_DIR}/include/fmt/format.h ${FMT_SOURCE_DIR}/include/fmt/format-inl.h - ${FMT_SOURCE_DIR}/include/fmt/locale.h ${FMT_SOURCE_DIR}/include/fmt/os.h ${FMT_SOURCE_DIR}/include/fmt/ostream.h ${FMT_SOURCE_DIR}/include/fmt/printf.h diff --git a/contrib/googletest b/contrib/googletest index e47544ad31c..a7f443b80b1 160000 --- a/contrib/googletest +++ b/contrib/googletest @@ -1 +1 @@ -Subproject commit e47544ad31cb3ceecd04cc13e8fe556f8df9fe0b +Subproject commit a7f443b80b105f940225332ed3c31f2790092f47 diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index b633f0fda50..023fdcf103a 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,7 +34,7 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() diff --git a/contrib/mariadb-connector-c b/contrib/mariadb-connector-c index e39608998f5..d0a788c5b9f 160000 --- a/contrib/mariadb-connector-c +++ b/contrib/mariadb-connector-c @@ -1 +1 @@ -Subproject commit e39608998f5f6944ece9ec61f48e9172ec1de660 +Subproject commit d0a788c5b9fcaca2368d9233770d3ca91ea79f88 diff --git a/contrib/openssl b/contrib/openssl index f7b8721dfc6..5d81fa7068f 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit f7b8721dfc66abb147f24ca07b9c9d1d64f40f71 +Subproject commit 5d81fa7068fc8c07f4d0997d5b703f3c541a637c diff --git a/contrib/orc b/contrib/orc index e24f2c2a3ca..947cebaf943 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit e24f2c2a3ca0769c96704ab20ad6f512a83ea2ad +Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f diff --git a/contrib/re2 b/contrib/re2 index a807e8a3aac..85dd7ad833a 160000 --- a/contrib/re2 +++ b/contrib/re2 @@ -1 +1 @@ -Subproject commit a807e8a3aac2cc33c77b7071efea54fcabe38e0c +Subproject commit 85dd7ad833a73095ecf3e3baea608ba051bbe2c7 diff --git a/contrib/re2-cmake/CMakeLists.txt b/contrib/re2-cmake/CMakeLists.txt index f773bc65a69..99d61839b30 100644 --- a/contrib/re2-cmake/CMakeLists.txt +++ b/contrib/re2-cmake/CMakeLists.txt @@ -28,16 +28,20 @@ set(RE2_SOURCES add_library(_re2 ${RE2_SOURCES}) target_include_directories(_re2 PUBLIC "${SRC_DIR}") target_link_libraries(_re2 PRIVATE + absl::absl_check + absl::absl_log absl::base absl::core_headers absl::fixed_array + absl::flags absl::flat_hash_map absl::flat_hash_set + absl::hash absl::inlined_vector - absl::strings - absl::str_format - absl::synchronization absl::optional - absl::span) + absl::span + absl::str_format + absl::strings + absl::synchronization) add_library(ch_contrib::re2 ALIAS _re2) diff --git a/docker/images.json b/docker/images.json index 7439517379b..716b76ee217 100644 --- a/docker/images.json +++ b/docker/images.json @@ -41,8 +41,7 @@ "docker/test/stateless": { "name": "clickhouse/stateless-test", "dependent": [ - "docker/test/stateful", - "docker/test/unit" + "docker/test/stateful" ] }, "docker/test/stateful": { @@ -122,15 +121,16 @@ "docker/test/base": { "name": "clickhouse/test-base", "dependent": [ + "docker/test/clickbench", "docker/test/fuzzer", - "docker/test/libfuzzer", "docker/test/integration/base", "docker/test/keeper-jepsen", + "docker/test/libfuzzer", "docker/test/server-jepsen", "docker/test/sqllogic", "docker/test/sqltest", - "docker/test/clickbench", - "docker/test/stateless" + "docker/test/stateless", + "docker/test/unit" ] }, "docker/test/integration/kerberized_hadoop": { diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index b3271d94184..018fe57bf56 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.5.1.1763" +ARG VERSION="24.6.1.4423" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 3f3b880c8f3..a86406e5129 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.5.1.1763" +ARG VERSION="24.6.1.4423" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 5fd22ee9b51..25f3273a648 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.5.1.1763" +ARG VERSION="24.6.1.4423" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index b8f967ed9c2..6191aeaf304 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -208,6 +208,7 @@ handle SIGPIPE nostop noprint pass handle SIGTERM nostop noprint pass handle SIGUSR1 nostop noprint pass handle SIGUSR2 nostop noprint pass +handle SIGSEGV nostop pass handle SIG$RTMIN nostop noprint pass info signals continue diff --git a/docker/test/stateless/attach_gdb.lib b/docker/test/stateless/attach_gdb.lib index d288288bb17..eb54f920b98 100644 --- a/docker/test/stateless/attach_gdb.lib +++ b/docker/test/stateless/attach_gdb.lib @@ -20,6 +20,7 @@ handle SIGPIPE nostop noprint pass handle SIGTERM nostop noprint pass handle SIGUSR1 nostop noprint pass handle SIGUSR2 nostop noprint pass +handle SIGSEGV nostop pass handle SIG$RTMIN nostop noprint pass info signals continue diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 4d2c2e6f466..7d6499cef5e 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -253,8 +253,8 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ - --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ + timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt set -e @@ -285,7 +285,7 @@ stop_logs_replication # Try to get logs while server is running failed_to_save_logs=0 -for table in query_log zookeeper_log trace_log transactions_info_log metric_log +for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log do err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes") echo "$err" @@ -339,7 +339,7 @@ if [ $failed_to_save_logs -ne 0 ]; then # directly # - even though ci auto-compress some files (but not *.tsv) it does this only # for files >64MB, we want this files to be compressed explicitly - for table in query_log zookeeper_log trace_log transactions_info_log metric_log + for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log do clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -379,6 +379,10 @@ fi tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: +rm -rf /var/lib/clickhouse/data/system/*/ +tar -chf /test_output/store.tar /var/lib/clickhouse/store ||: +tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||: + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: rg -Fa "" /var/log/clickhouse-server/clickhouse-server2.log ||: diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index 3b6ad244c82..c069ccbdd8d 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -89,10 +89,6 @@ function configure() # since we run clickhouse from root sudo chown root: /var/lib/clickhouse - # Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM). - echo "1" \ - > /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml - local total_mem total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB total_mem=$(( total_mem*1024 )) # bytes diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index cb29185f068..7cd712b73f6 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -10,14 +10,15 @@ RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ aspell \ curl \ - git \ - gh \ file \ + gh \ + git \ libxml2-utils \ + locales \ moreutils \ python3-pip \ yamllint \ - locales \ + zstd \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* @@ -30,10 +31,13 @@ RUN pip3 install \ mypy==1.8.0 \ pylint==3.1.0 \ python-magic==0.4.24 \ + flake8==4.0.1 \ requests \ thefuzz \ + tqdm==4.66.4 \ types-requests \ unidiff \ + jwt \ && rm -rf /root/.cache/pip RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index cc6cb292b66..64803191532 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -9,6 +9,8 @@ echo "Check style" | ts ./check-style -n |& tee /test_output/style_output.txt echo "Check python formatting with black" | ts ./check-black -n |& tee /test_output/black_output.txt +echo "Check python with flake8" | ts +./check-flake8 |& tee /test_output/flake8_output.txt echo "Check python type hinting with mypy" | ts ./check-mypy -n |& tee /test_output/mypy_output.txt echo "Check typos" | ts diff --git a/docker/test/unit/Dockerfile b/docker/test/unit/Dockerfile index cf5ba1eec7f..af44dc930b2 100644 --- a/docker/test/unit/Dockerfile +++ b/docker/test/unit/Dockerfile @@ -1,9 +1,7 @@ # rebuild in #33610 # docker build -t clickhouse/unit-test . ARG FROM_TAG=latest -FROM clickhouse/stateless-test:$FROM_TAG - -RUN apt-get install gdb +FROM clickhouse/test-base:$FROM_TAG COPY run.sh / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 1f2cc9903b2..a4c4c75e5b3 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -25,7 +25,8 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & ./setup_minio.sh stateless # to have a proper environment echo "Get previous release tag" -previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag) +# shellcheck disable=SC2016 +previous_release_tag=$(dpkg-deb --showformat='${Version}' --show package_folder/clickhouse-client*.deb | get_previous_release_tag) echo $previous_release_tag echo "Clone previous release repository" diff --git a/docs/changelogs/v23.8.15.35-lts.md b/docs/changelogs/v23.8.15.35-lts.md new file mode 100644 index 00000000000..bab5c507fe8 --- /dev/null +++ b/docs/changelogs/v23.8.15.35-lts.md @@ -0,0 +1,40 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.8.15.35-lts (060ff8e813a) FIXME as compared to v23.8.14.6-lts (967e51c1d6b) + +#### Build/Testing/Packaging Improvement +* Backported in [#63621](https://github.com/ClickHouse/ClickHouse/issues/63621): The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#65153](https://github.com/ClickHouse/ClickHouse/issues/65153): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backported in [#64422](https://github.com/ClickHouse/ClickHouse/issues/64422): Fixes [#59989](https://github.com/ClickHouse/ClickHouse/issues/59989): runs init scripts when force-enabled or when no database exists, rather than the inverse. [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)). +* Backported in [#64016](https://github.com/ClickHouse/ClickHouse/issues/64016): Fix "Invalid storage definition in metadata file" for parameterized views. [#60708](https://github.com/ClickHouse/ClickHouse/pull/60708) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#63456](https://github.com/ClickHouse/ClickHouse/issues/63456): Fix the issue where the function `addDays` (and similar functions) reports an error when the first parameter is `DateTime64`. [#61561](https://github.com/ClickHouse/ClickHouse/pull/61561) ([Shuai li](https://github.com/loneylee)). +* Backported in [#63289](https://github.com/ClickHouse/ClickHouse/issues/63289): Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#63512](https://github.com/ClickHouse/ClickHouse/issues/63512): Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)). +* Backported in [#63902](https://github.com/ClickHouse/ClickHouse/issues/63902): `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)). +* Backported in [#64104](https://github.com/ClickHouse/ClickHouse/issues/64104): Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#64265](https://github.com/ClickHouse/ClickHouse/issues/64265): Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64867](https://github.com/ClickHouse/ClickHouse/issues/64867): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)). + +#### NO CL CATEGORY + +* Backported in [#63704](https://github.com/ClickHouse/ClickHouse/issues/63704):. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Installation test has wrong check_state'. [#63994](https://github.com/ClickHouse/ClickHouse/pull/63994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#63343](https://github.com/ClickHouse/ClickHouse/issues/63343): The commit url has different pattern. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#63965](https://github.com/ClickHouse/ClickHouse/issues/63965): fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#64043](https://github.com/ClickHouse/ClickHouse/issues/64043): Do not create new release in release branch automatically. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Pin requests to fix the integration tests. [#65183](https://github.com/ClickHouse/ClickHouse/pull/65183) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v24.1.6.52-stable.md b/docs/changelogs/v24.1.6.52-stable.md new file mode 100644 index 00000000000..341561e9a64 --- /dev/null +++ b/docs/changelogs/v24.1.6.52-stable.md @@ -0,0 +1,45 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.1.6.52-stable (fa09f677bc9) FIXME as compared to v24.1.5.6-stable (7f67181ff31) + +#### Improvement +* Backported in [#60292](https://github.com/ClickHouse/ClickHouse/issues/60292): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#60832](https://github.com/ClickHouse/ClickHouse/issues/60832): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backported in [#60413](https://github.com/ClickHouse/ClickHouse/issues/60413): Fix segmentation fault in KQL parser when the input query exceeds the `max_query_size`. Also re-enable the KQL dialect. Fixes [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036) and [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037). [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)). +* Backported in [#60074](https://github.com/ClickHouse/ClickHouse/issues/60074): Fix error `Read beyond last offset` for `AsynchronousBoundedReadBuffer`. [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#60299](https://github.com/ClickHouse/ClickHouse/issues/60299): Fix having neigher acked nor nacked messages. If exception happens during read-write phase, messages will be nacked. [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#60066](https://github.com/ClickHouse/ClickHouse/issues/60066): Fix optimize_uniq_to_count removing the column alias. [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#60638](https://github.com/ClickHouse/ClickHouse/issues/60638): Fixed a bug in parallel optimization for queries with `FINAL`, which could give an incorrect result in rare cases. [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#60177](https://github.com/ClickHouse/ClickHouse/issues/60177): Fix cosineDistance crash with Nullable. [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#60279](https://github.com/ClickHouse/ClickHouse/issues/60279): Hide sensitive info for `S3Queue` table engine. [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#61000](https://github.com/ClickHouse/ClickHouse/issues/61000): Reduce the number of read rows from `system.numbers`. Fixes [#59418](https://github.com/ClickHouse/ClickHouse/issues/59418). [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)). +* Backported in [#60791](https://github.com/ClickHouse/ClickHouse/issues/60791): Fix buffer overflow that can happen if the attacker asks the HTTP server to decompress data with a composition of codecs and size triggering numeric overflow. Fix buffer overflow that can happen inside codec NONE on wrong input data. This was submitted by TIANGONG research team through our [Bug Bounty program](https://github.com/ClickHouse/ClickHouse/issues/38986). [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#60783](https://github.com/ClickHouse/ClickHouse/issues/60783): Functions for SQL/JSON were able to read uninitialized memory. This closes [#60017](https://github.com/ClickHouse/ClickHouse/issues/60017). Found by Fuzzer. [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#60803](https://github.com/ClickHouse/ClickHouse/issues/60803): Do not set aws custom metadata `x-amz-meta-*` headers on UploadPart & CompleteMultipartUpload calls. [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Backported in [#60820](https://github.com/ClickHouse/ClickHouse/issues/60820): Fix crash in arrayEnumerateRanked. [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#60841](https://github.com/ClickHouse/ClickHouse/issues/60841): Fix crash when using input() in INSERT SELECT JOIN. Closes [#60035](https://github.com/ClickHouse/ClickHouse/issues/60035). [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#60904](https://github.com/ClickHouse/ClickHouse/issues/60904): Avoid segfault if too many keys are skipped when reading from S3. [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). + +#### NO CL CATEGORY + +* Backported in [#60186](https://github.com/ClickHouse/ClickHouse/issues/60186):. [#60181](https://github.com/ClickHouse/ClickHouse/pull/60181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#60333](https://github.com/ClickHouse/ClickHouse/issues/60333): CI: Fix job failures due to jepsen artifacts. [#59890](https://github.com/ClickHouse/ClickHouse/pull/59890) ([Max K.](https://github.com/maxknv)). +* Backported in [#60034](https://github.com/ClickHouse/ClickHouse/issues/60034): Fix mark release ready. [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#60326](https://github.com/ClickHouse/ClickHouse/issues/60326): Ability to detect undead ZooKeeper sessions. [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#60363](https://github.com/ClickHouse/ClickHouse/issues/60363): CI: hot fix for gh statuses. [#60201](https://github.com/ClickHouse/ClickHouse/pull/60201) ([Max K.](https://github.com/maxknv)). +* Backported in [#60648](https://github.com/ClickHouse/ClickHouse/issues/60648): Detect io_uring in tests. [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#60569](https://github.com/ClickHouse/ClickHouse/issues/60569): Remove broken test while we fix it. [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#60756](https://github.com/ClickHouse/ClickHouse/issues/60756): Update shellcheck. [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#60584](https://github.com/ClickHouse/ClickHouse/issues/60584): CI: fix docker build job name. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)). + diff --git a/docs/changelogs/v24.3.4.147-lts.md b/docs/changelogs/v24.3.4.147-lts.md new file mode 100644 index 00000000000..7d77fb29977 --- /dev/null +++ b/docs/changelogs/v24.3.4.147-lts.md @@ -0,0 +1,100 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.3.4.147-lts (31a7bdc346d) FIXME as compared to v24.3.3.102-lts (7e7f3bdd9be) + +#### Improvement +* Backported in [#63465](https://github.com/ClickHouse/ClickHouse/issues/63465): Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#64290](https://github.com/ClickHouse/ClickHouse/issues/64290): Fix logical-error when undoing quorum insert transaction. [#61953](https://github.com/ClickHouse/ClickHouse/pull/61953) ([Han Fei](https://github.com/hanfei1991)). + +#### Build/Testing/Packaging Improvement +* Backported in [#63610](https://github.com/ClickHouse/ClickHouse/issues/63610): The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#65128](https://github.com/ClickHouse/ClickHouse/issues/65128): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backported in [#64277](https://github.com/ClickHouse/ClickHouse/issues/64277): Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#63716](https://github.com/ClickHouse/ClickHouse/issues/63716): Fix excessive memory usage for queries with nested lambdas. Fixes [#62036](https://github.com/ClickHouse/ClickHouse/issues/62036). [#62462](https://github.com/ClickHouse/ClickHouse/pull/62462) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#63247](https://github.com/ClickHouse/ClickHouse/issues/63247): Fix size checks when updating materialized nested columns ( fixes [#62731](https://github.com/ClickHouse/ClickHouse/issues/62731) ). [#62773](https://github.com/ClickHouse/ClickHouse/pull/62773) ([Eliot Hautefeuille](https://github.com/hileef)). +* Backported in [#62984](https://github.com/ClickHouse/ClickHouse/issues/62984): Fix the `Unexpected return type` error for queries that read from `StorageBuffer` with `PREWHERE` when the source table has different types. Fixes [#62545](https://github.com/ClickHouse/ClickHouse/issues/62545). [#62916](https://github.com/ClickHouse/ClickHouse/pull/62916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#63185](https://github.com/ClickHouse/ClickHouse/issues/63185): Sanity check: Clamp values instead of throwing. [#63119](https://github.com/ClickHouse/ClickHouse/pull/63119) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#63293](https://github.com/ClickHouse/ClickHouse/issues/63293): Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#63411](https://github.com/ClickHouse/ClickHouse/issues/63411): Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)). +* Backported in [#63616](https://github.com/ClickHouse/ClickHouse/issues/63616): Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)). +* Backported in [#63455](https://github.com/ClickHouse/ClickHouse/issues/63455): Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)). +* Backported in [#63603](https://github.com/ClickHouse/ClickHouse/issues/63603): Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#63508](https://github.com/ClickHouse/ClickHouse/issues/63508): Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)). +* Backported in [#63595](https://github.com/ClickHouse/ClickHouse/issues/63595): Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#63748](https://github.com/ClickHouse/ClickHouse/issues/63748): Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#63770](https://github.com/ClickHouse/ClickHouse/issues/63770): Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64189](https://github.com/ClickHouse/ClickHouse/issues/64189): Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#63845](https://github.com/ClickHouse/ClickHouse/issues/63845): Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)). +* Backported in [#63906](https://github.com/ClickHouse/ClickHouse/issues/63906): `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)). +* Backported in [#63989](https://github.com/ClickHouse/ClickHouse/issues/63989): Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#64031](https://github.com/ClickHouse/ClickHouse/issues/64031): Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64559](https://github.com/ClickHouse/ClickHouse/issues/64559): Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#64009](https://github.com/ClickHouse/ClickHouse/issues/64009): Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#64236](https://github.com/ClickHouse/ClickHouse/issues/64236): Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64106](https://github.com/ClickHouse/ClickHouse/issues/64106): Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#64168](https://github.com/ClickHouse/ClickHouse/issues/64168): Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64320](https://github.com/ClickHouse/ClickHouse/issues/64320): This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline Closes [#63777](https://github.com/ClickHouse/ClickHouse/issues/63777). [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)). +* Backported in [#64380](https://github.com/ClickHouse/ClickHouse/issues/64380): Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#64567](https://github.com/ClickHouse/ClickHouse/issues/64567): Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#64270](https://github.com/ClickHouse/ClickHouse/issues/64270): Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64339](https://github.com/ClickHouse/ClickHouse/issues/64339): The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#64259](https://github.com/ClickHouse/ClickHouse/issues/64259): Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#64688](https://github.com/ClickHouse/ClickHouse/issues/64688): Fix Query Tree size validation. Closes [#63701](https://github.com/ClickHouse/ClickHouse/issues/63701). [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64725](https://github.com/ClickHouse/ClickHouse/issues/64725): Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#64621](https://github.com/ClickHouse/ClickHouse/issues/64621): Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64678](https://github.com/ClickHouse/ClickHouse/issues/64678): Fix [#64612](https://github.com/ClickHouse/ClickHouse/issues/64612). Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64831](https://github.com/ClickHouse/ClickHouse/issues/64831): Fix bug which could lead to non-working TTLs with expressions. Fixes [#63700](https://github.com/ClickHouse/ClickHouse/issues/63700). [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)). +* Backported in [#64940](https://github.com/ClickHouse/ClickHouse/issues/64940): Fix OrderByLimitByDuplicateEliminationVisitor across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64869](https://github.com/ClickHouse/ClickHouse/issues/64869): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#64980](https://github.com/ClickHouse/ClickHouse/issues/64980): Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64972](https://github.com/ClickHouse/ClickHouse/issues/64972): Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)). +* Backported in [#65070](https://github.com/ClickHouse/ClickHouse/issues/65070): Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#65175](https://github.com/ClickHouse/ClickHouse/issues/65175): Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) + +* Backported in [#64587](https://github.com/ClickHouse/ClickHouse/issues/64587): Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#64878](https://github.com/ClickHouse/ClickHouse/issues/64878): This PR fixes an error when a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)). + +#### NO CL CATEGORY + +* Backported in [#63304](https://github.com/ClickHouse/ClickHouse/issues/63304):. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#63708](https://github.com/ClickHouse/ClickHouse/issues/63708):. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#64363](https://github.com/ClickHouse/ClickHouse/issues/64363) to 24.3: Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts"'. [#64907](https://github.com/ClickHouse/ClickHouse/pull/64907) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#63751](https://github.com/ClickHouse/ClickHouse/issues/63751): group_by_use_nulls strikes back. [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#63558](https://github.com/ClickHouse/ClickHouse/issues/63558): Try fix segfault in `MergeTreeReadPoolBase::createTask`. [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#63336](https://github.com/ClickHouse/ClickHouse/issues/63336): The commit url has different pattern. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#63374](https://github.com/ClickHouse/ClickHouse/issues/63374): Add tags for the test 03000_traverse_shadow_system_data_paths.sql to make it stable. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#63625](https://github.com/ClickHouse/ClickHouse/issues/63625): Workaround for `oklch()` inside canvas bug for firefox. [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)). +* Backported in [#63569](https://github.com/ClickHouse/ClickHouse/issues/63569): Add `jwcrypto` to integration tests runner. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Backported in [#63649](https://github.com/ClickHouse/ClickHouse/issues/63649): Fix `02362_part_log_merge_algorithm` flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Backported in [#63762](https://github.com/ClickHouse/ClickHouse/issues/63762): Cancel S3 reads properly when parallel reads are used. [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#63741](https://github.com/ClickHouse/ClickHouse/issues/63741): Userspace page cache: don't collect stats if cache is unused. [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#63826](https://github.com/ClickHouse/ClickHouse/issues/63826): Fix `test_odbc_interaction` for arm64 on linux. [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)). +* Backported in [#63895](https://github.com/ClickHouse/ClickHouse/issues/63895): Fix `test_catboost_evaluate` for aarch64. [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)). +* Backported in [#63887](https://github.com/ClickHouse/ClickHouse/issues/63887): Fix `test_disk_types` for aarch64. [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)). +* Backported in [#63879](https://github.com/ClickHouse/ClickHouse/issues/63879): Fix `test_short_strings_aggregation` for arm. [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)). +* Backported in [#63916](https://github.com/ClickHouse/ClickHouse/issues/63916): Disable `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)). +* Backported in [#63969](https://github.com/ClickHouse/ClickHouse/issues/63969): fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#64047](https://github.com/ClickHouse/ClickHouse/issues/64047): Do not create new release in release branch automatically. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#64076](https://github.com/ClickHouse/ClickHouse/issues/64076): Files without shebang have mime 'text/plain' or 'inode/x-empty'. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#64142](https://github.com/ClickHouse/ClickHouse/issues/64142): Fix sanitizers. [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#64159](https://github.com/ClickHouse/ClickHouse/issues/64159): Add retries in `git submodule update`. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#64473](https://github.com/ClickHouse/ClickHouse/issues/64473): Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts. [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#65113](https://github.com/ClickHouse/ClickHouse/issues/65113): Adjust the `version_helper` and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#64999](https://github.com/ClickHouse/ClickHouse/issues/64999): Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)). + diff --git a/docs/changelogs/v24.4.2.141-stable.md b/docs/changelogs/v24.4.2.141-stable.md new file mode 100644 index 00000000000..656d0854392 --- /dev/null +++ b/docs/changelogs/v24.4.2.141-stable.md @@ -0,0 +1,101 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.4.2.141-stable (9e23d27bd11) FIXME as compared to v24.4.1.2088-stable (6d4b31322d1) + +#### Improvement +* Backported in [#63467](https://github.com/ClickHouse/ClickHouse/issues/63467): Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Backported in [#63612](https://github.com/ClickHouse/ClickHouse/issues/63612): The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backported in [#64279](https://github.com/ClickHouse/ClickHouse/issues/64279): Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#63295](https://github.com/ClickHouse/ClickHouse/issues/63295): Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#63978](https://github.com/ClickHouse/ClickHouse/issues/63978): Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#63413](https://github.com/ClickHouse/ClickHouse/issues/63413): Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)). +* Backported in [#63388](https://github.com/ClickHouse/ClickHouse/issues/63388): JOIN filter push down filled join fix. Closes [#63228](https://github.com/ClickHouse/ClickHouse/issues/63228). [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#63618](https://github.com/ClickHouse/ClickHouse/issues/63618): Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)). +* Backported in [#63451](https://github.com/ClickHouse/ClickHouse/issues/63451): Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)). +* Backported in [#63605](https://github.com/ClickHouse/ClickHouse/issues/63605): Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#63510](https://github.com/ClickHouse/ClickHouse/issues/63510): Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)). +* Backported in [#63592](https://github.com/ClickHouse/ClickHouse/issues/63592): Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#63750](https://github.com/ClickHouse/ClickHouse/issues/63750): Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#63772](https://github.com/ClickHouse/ClickHouse/issues/63772): Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#63872](https://github.com/ClickHouse/ClickHouse/issues/63872): Flatten_nested is broken with replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#63854](https://github.com/ClickHouse/ClickHouse/issues/63854): Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#63847](https://github.com/ClickHouse/ClickHouse/issues/63847): Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)). +* Backported in [#63908](https://github.com/ClickHouse/ClickHouse/issues/63908): `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)). +* Backported in [#63955](https://github.com/ClickHouse/ClickHouse/issues/63955): Fix possible crash with SYSTEM UNLOAD PRIMARY KEY. [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#63938](https://github.com/ClickHouse/ClickHouse/issues/63938): Allow JOIN filter push down to both streams if only single equivalent column is used in query. Closes [#63799](https://github.com/ClickHouse/ClickHouse/issues/63799). [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#63991](https://github.com/ClickHouse/ClickHouse/issues/63991): Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#64033](https://github.com/ClickHouse/ClickHouse/issues/64033): Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64561](https://github.com/ClickHouse/ClickHouse/issues/64561): Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#64011](https://github.com/ClickHouse/ClickHouse/issues/64011): Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#64238](https://github.com/ClickHouse/ClickHouse/issues/64238): Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64103](https://github.com/ClickHouse/ClickHouse/issues/64103): Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#64170](https://github.com/ClickHouse/ClickHouse/issues/64170): Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64322](https://github.com/ClickHouse/ClickHouse/issues/64322): This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline Closes [#63777](https://github.com/ClickHouse/ClickHouse/issues/63777). [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)). +* Backported in [#64382](https://github.com/ClickHouse/ClickHouse/issues/64382): Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#64568](https://github.com/ClickHouse/ClickHouse/issues/64568): Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#64272](https://github.com/ClickHouse/ClickHouse/issues/64272): Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64330](https://github.com/ClickHouse/ClickHouse/issues/64330): The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#64254](https://github.com/ClickHouse/ClickHouse/issues/64254): Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#64690](https://github.com/ClickHouse/ClickHouse/issues/64690): Fix Query Tree size validation. Closes [#63701](https://github.com/ClickHouse/ClickHouse/issues/63701). [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64409](https://github.com/ClickHouse/ClickHouse/issues/64409): Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. Fixes [#64172](https://github.com/ClickHouse/ClickHouse/issues/64172). [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64727](https://github.com/ClickHouse/ClickHouse/issues/64727): Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#64623](https://github.com/ClickHouse/ClickHouse/issues/64623): Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64680](https://github.com/ClickHouse/ClickHouse/issues/64680): Fix [#64612](https://github.com/ClickHouse/ClickHouse/issues/64612). Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64942](https://github.com/ClickHouse/ClickHouse/issues/64942): Fix OrderByLimitByDuplicateEliminationVisitor across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64871](https://github.com/ClickHouse/ClickHouse/issues/64871): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#63364](https://github.com/ClickHouse/ClickHouse/issues/63364): Implement cumulative A Sync status. [#61464](https://github.com/ClickHouse/ClickHouse/pull/61464) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#63338](https://github.com/ClickHouse/ClickHouse/issues/63338): Use `/commit/` to have the URLs in [reports](https://play.clickhouse.com/play?user=play#c2VsZWN0IGRpc3RpbmN0IGNvbW1pdF91cmwgZnJvbSBjaGVja3Mgd2hlcmUgY2hlY2tfc3RhcnRfdGltZSA+PSBub3coKSAtIGludGVydmFsIDEgbW9udGggYW5kIHB1bGxfcmVxdWVzdF9udW1iZXI9NjA1MzI=) like https://github.com/ClickHouse/ClickHouse/commit/44f8bc5308b53797bec8cccc3bd29fab8a00235d and not like https://github.com/ClickHouse/ClickHouse/commits/44f8bc5308b53797bec8cccc3bd29fab8a00235d. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#63376](https://github.com/ClickHouse/ClickHouse/issues/63376):. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#63571](https://github.com/ClickHouse/ClickHouse/issues/63571):. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Backported in [#63651](https://github.com/ClickHouse/ClickHouse/issues/63651): Fix 02362_part_log_merge_algorithm flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Backported in [#63828](https://github.com/ClickHouse/ClickHouse/issues/63828): Fix test_odbc_interaction from aarch64 [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)). +* Backported in [#63897](https://github.com/ClickHouse/ClickHouse/issues/63897): Fix test `test_catboost_evaluate` for aarch64. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)). +* Backported in [#63889](https://github.com/ClickHouse/ClickHouse/issues/63889): Remove HDFS from disks config for one integration test for arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)). +* Backported in [#63881](https://github.com/ClickHouse/ClickHouse/issues/63881): Bump version for old image in test_short_strings_aggregation to make it work on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)). +* Backported in [#63919](https://github.com/ClickHouse/ClickHouse/issues/63919): Disable test `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)). +* Backported in [#63971](https://github.com/ClickHouse/ClickHouse/issues/63971): Fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#64049](https://github.com/ClickHouse/ClickHouse/issues/64049): Add `ClickHouseVersion.copy` method. Create a branch release in advance without spinning out the release to increase the stability. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#64078](https://github.com/ClickHouse/ClickHouse/issues/64078): The mime type is not 100% reliable for Python and shell scripts without shebangs; add a check for file extension. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#64161](https://github.com/ClickHouse/ClickHouse/issues/64161): Add retries in git submodule update. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) + +* Backported in [#64589](https://github.com/ClickHouse/ClickHouse/issues/64589): Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#64880](https://github.com/ClickHouse/ClickHouse/issues/64880): This PR fixes an error when a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)). + +#### NO CL CATEGORY + +* Backported in [#63306](https://github.com/ClickHouse/ClickHouse/issues/63306):. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#63710](https://github.com/ClickHouse/ClickHouse/issues/63710):. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#64363](https://github.com/ClickHouse/ClickHouse/issues/64363) to 24.4: Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts"'. [#64905](https://github.com/ClickHouse/ClickHouse/pull/64905) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* group_by_use_nulls strikes back [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add `FROM` keyword to `TRUNCATE ALL TABLES` [#63241](https://github.com/ClickHouse/ClickHouse/pull/63241) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* More checks for concurrently deleted files and dirs in system.remote_data_paths [#63274](https://github.com/ClickHouse/ClickHouse/pull/63274) ([Alexander Gololobov](https://github.com/davenger)). +* Try fix segfault in `MergeTreeReadPoolBase::createTask` [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)). +* Skip unaccessible table dirs in system.remote_data_paths [#63330](https://github.com/ClickHouse/ClickHouse/pull/63330) ([Alexander Gololobov](https://github.com/davenger)). +* Workaround for `oklch()` inside canvas bug for firefox [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)). +* Cancel S3 reads properly when parallel reads are used [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)). +* Userspace page cache: don't collect stats if cache is unused [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix sanitizers [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)). +* Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)). +* CI: Critical bugfix category in PR template [#64480](https://github.com/ClickHouse/ClickHouse/pull/64480) ([Max K.](https://github.com/maxknv)). + diff --git a/docs/changelogs/v24.4.3.25-stable.md b/docs/changelogs/v24.4.3.25-stable.md new file mode 100644 index 00000000000..9582753c731 --- /dev/null +++ b/docs/changelogs/v24.4.3.25-stable.md @@ -0,0 +1,30 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.4.3.25-stable (a915dd4eda4) FIXME as compared to v24.4.2.141-stable (9e23d27bd11) + +#### Build/Testing/Packaging Improvement +* Backported in [#65130](https://github.com/ClickHouse/ClickHouse/issues/65130): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backported in [#64982](https://github.com/ClickHouse/ClickHouse/issues/64982): Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64974](https://github.com/ClickHouse/ClickHouse/issues/64974): Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)). +* Backported in [#65072](https://github.com/ClickHouse/ClickHouse/issues/65072): Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#65177](https://github.com/ClickHouse/ClickHouse/issues/65177): Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65263](https://github.com/ClickHouse/ClickHouse/issues/65263): Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) + +* Backported in [#65285](https://github.com/ClickHouse/ClickHouse/issues/65285): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65114](https://github.com/ClickHouse/ClickHouse/issues/65114): Adjust the `version_helper` and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#65225](https://github.com/ClickHouse/ClickHouse/issues/65225): Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#65217](https://github.com/ClickHouse/ClickHouse/issues/65217): Fix false positives leaky memory warnings in OpenSSL. [#65125](https://github.com/ClickHouse/ClickHouse/pull/65125) ([Robert Schulze](https://github.com/rschu1ze)). + diff --git a/docs/changelogs/v24.5.2.34-stable.md b/docs/changelogs/v24.5.2.34-stable.md new file mode 100644 index 00000000000..2db05a5f5dc --- /dev/null +++ b/docs/changelogs/v24.5.2.34-stable.md @@ -0,0 +1,38 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.5.2.34-stable (45589aeee49) FIXME as compared to v24.5.1.1763-stable (647c154a94d) + +#### Improvement +* Backported in [#65096](https://github.com/ClickHouse/ClickHouse/issues/65096): The setting `allow_experimental_join_condition` was accidentally marked as important which may prevent distributed queries in a mixed versions cluster from being executed successfully. [#65008](https://github.com/ClickHouse/ClickHouse/pull/65008) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#65132](https://github.com/ClickHouse/ClickHouse/issues/65132): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backported in [#64729](https://github.com/ClickHouse/ClickHouse/issues/64729): Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#65061](https://github.com/ClickHouse/ClickHouse/issues/65061): Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.` Fixes [#64445](https://github.com/ClickHouse/ClickHouse/issues/64445). [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088): Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). Fixes [#64575](https://github.com/ClickHouse/ClickHouse/issues/64575). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64944](https://github.com/ClickHouse/ClickHouse/issues/64944): Fix OrderByLimitByDuplicateEliminationVisitor across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64873](https://github.com/ClickHouse/ClickHouse/issues/64873): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#64984](https://github.com/ClickHouse/ClickHouse/issues/64984): Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64976](https://github.com/ClickHouse/ClickHouse/issues/64976): Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)). +* Backported in [#65074](https://github.com/ClickHouse/ClickHouse/issues/65074): Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#65179](https://github.com/ClickHouse/ClickHouse/issues/65179): Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65163](https://github.com/ClickHouse/ClickHouse/issues/65163): Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. Part of [#62245](https://github.com/ClickHouse/ClickHouse/issues/62245). [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) + +* Backported in [#64882](https://github.com/ClickHouse/ClickHouse/issues/64882): This PR fixes an error when a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65002](https://github.com/ClickHouse/ClickHouse/issues/65002): Be more graceful with existing tables with `inverted` indexes. [#64656](https://github.com/ClickHouse/ClickHouse/pull/64656) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#65115](https://github.com/ClickHouse/ClickHouse/issues/65115): Adjust the `version_helper` and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#64796](https://github.com/ClickHouse/ClickHouse/issues/64796): Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)). + diff --git a/docs/changelogs/v24.5.3.5-stable.md b/docs/changelogs/v24.5.3.5-stable.md new file mode 100644 index 00000000000..4606e58d0a4 --- /dev/null +++ b/docs/changelogs/v24.5.3.5-stable.md @@ -0,0 +1,14 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.5.3.5-stable (e0eb66f8e17) FIXME as compared to v24.5.2.34-stable (45589aeee49) + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65227](https://github.com/ClickHouse/ClickHouse/issues/65227): Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#65219](https://github.com/ClickHouse/ClickHouse/issues/65219): Fix false positives leaky memory warnings in OpenSSL. [#65125](https://github.com/ClickHouse/ClickHouse/pull/65125) ([Robert Schulze](https://github.com/rschu1ze)). + diff --git a/docs/changelogs/v24.5.4.49-stable.md b/docs/changelogs/v24.5.4.49-stable.md new file mode 100644 index 00000000000..56d497d5f59 --- /dev/null +++ b/docs/changelogs/v24.5.4.49-stable.md @@ -0,0 +1,41 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.5.4.49-stable (63b760955a0) FIXME as compared to v24.5.3.5-stable (e0eb66f8e17) + +#### Improvement +* Backported in [#65886](https://github.com/ClickHouse/ClickHouse/issues/65886): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#65304](https://github.com/ClickHouse/ClickHouse/issues/65304): Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#65896](https://github.com/ClickHouse/ClickHouse/issues/65896): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) +* Backported in [#65287](https://github.com/ClickHouse/ClickHouse/issues/65287): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#65374](https://github.com/ClickHouse/ClickHouse/issues/65374): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#65437](https://github.com/ClickHouse/ClickHouse/issues/65437): Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#65450](https://github.com/ClickHouse/ClickHouse/issues/65450): Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65712](https://github.com/ClickHouse/ClickHouse/issues/65712): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65681](https://github.com/ClickHouse/ClickHouse/issues/65681): Fix `duplicate alias` error for distributed queries with `ARRAY JOIN`. [#64226](https://github.com/ClickHouse/ClickHouse/pull/64226) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65331](https://github.com/ClickHouse/ClickHouse/issues/65331): Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)). +* Backported in [#64835](https://github.com/ClickHouse/ClickHouse/issues/64835): Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)). +* Backported in [#65542](https://github.com/ClickHouse/ClickHouse/issues/65542): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)). +* Backported in [#65580](https://github.com/ClickHouse/ClickHouse/issues/65580): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). +* Backported in [#65618](https://github.com/ClickHouse/ClickHouse/issues/65618): Fix possible infinite query duration in case of cyclic aliases. Fixes [#64849](https://github.com/ClickHouse/ClickHouse/issues/64849). [#65081](https://github.com/ClickHouse/ClickHouse/pull/65081) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65617](https://github.com/ClickHouse/ClickHouse/issues/65617): Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#65732](https://github.com/ClickHouse/ClickHouse/issues/65732): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#65265](https://github.com/ClickHouse/ClickHouse/issues/65265): Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)). +* Backported in [#65663](https://github.com/ClickHouse/ClickHouse/issues/65663): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65788](https://github.com/ClickHouse/ClickHouse/issues/65788): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#65812](https://github.com/ClickHouse/ClickHouse/issues/65812): Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65828](https://github.com/ClickHouse/ClickHouse/issues/65828): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65412](https://github.com/ClickHouse/ClickHouse/issues/65412): Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#65905](https://github.com/ClickHouse/ClickHouse/issues/65905): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/docs/changelogs/v24.6.1.4423-stable.md b/docs/changelogs/v24.6.1.4423-stable.md new file mode 100644 index 00000000000..f7af9cbaf8d --- /dev/null +++ b/docs/changelogs/v24.6.1.4423-stable.md @@ -0,0 +1,735 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.1.4423-stable (dcced7c8478) FIXME as compared to v24.4.1.2088-stable (6d4b31322d1) + +#### Backward Incompatible Change +* Enable asynchronous load of databases and tables by default. See the `async_load_databases` in config.xml. While this change is fully compatible, it can introduce a difference in behavior. When `async_load_databases` is false, as in the previous versions, the server will not accept connections until all tables are loaded. When `async_load_databases` is true, as in the new version, the server can accept connections before all the tables are loaded. If a query is made to a table that is not yet loaded, it will wait for the table's loading, which can take considerable time. It can change the behavior of the server if it is part of a large distributed system under a load balancer. In the first case, the load balancer can get a connection refusal and quickly failover to another server. In the second case, the load balancer can connect to a server that is still loading the tables, and the query will have a higher latency. Moreover, if many queries accumulate in the waiting state, it can lead to a "thundering herd" problem when they start processing simultaneously. This can make a difference only for highly loaded distributed backends. You can set the value of `async_load_databases` to false to avoid this problem. [#57695](https://github.com/ClickHouse/ClickHouse/pull/57695) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some invalid queries will fail earlier during parsing. Note: disabled the support for inline KQL expressions (the experimental Kusto language) when they are put into a `kql` table function without a string literal, e.g. `kql(garbage | trash)` instead of `kql('garbage | trash')` or `kql($$garbage | trash$$)`. This feature was introduced unintentionally and should not exist. [#61500](https://github.com/ClickHouse/ClickHouse/pull/61500) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)). +* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_functions=1`. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)). +* Queries from `system.columns` will work faster if there is a large number of columns, but many databases or tables are not granted for `SHOW TABLES`. Note that in previous versions, if you grant `SHOW COLUMNS` to individual columns without granting `SHOW TABLES` to the corresponding tables, the `system.columns` table will show these columns, but in a new version, it will skip the table entirely. Remove trace log messages "Access granted" and "Access denied" that slowed down queries. [#63439](https://github.com/ClickHouse/ClickHouse/pull/63439) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rework parallel processing in `Ordered` mode of storage `S3Queue`. This PR is backward incompatible for Ordered mode if you used settings `s3queue_processing_threads_num` or `s3queue_total_shards_num`. Setting `s3queue_total_shards_num` is deleted, previously it was allowed to use only under `s3queue_allow_experimental_sharded_mode`, which is now deprecated. A new setting is added - `s3queue_buckets`. [#64349](https://github.com/ClickHouse/ClickHouse/pull/64349) ([Kseniia Sumarokova](https://github.com/kssenii)). +* New functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` were added. Unlike the existing functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake`, the new functions are compatible with function `generateSnowflakeID`, i.e. they accept the snowflake IDs generated by `generateSnowflakeID` and produce snowflake IDs of the same type as `generateSnowflakeID` (i.e. `UInt64`). Furthermore, the new functions default to the UNIX epoch (aka. 1970-01-01), just like `generateSnowflakeID`. If necessary, a different epoch, e.g. Twitter's/X's epoch 2010-11-04 aka. 1288834974657 msec since UNIX epoch, can be passed. The old conversion functions are deprecated and will be removed after a transition period: to use them regardless, enable setting `allow_deprecated_snowflake_conversion_functions`. [#64948](https://github.com/ClickHouse/ClickHouse/pull/64948) ([Robert Schulze](https://github.com/rschu1ze)). + +#### New Feature +* Provide support for AzureBlobStorage function in ClickHouse server to use Azure Workload identity to authenticate against Azure blob storage. If `use_workload_identity` parameter is set in config, [workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications) is used for authentication. [#57881](https://github.com/ClickHouse/ClickHouse/pull/57881) ([Vinay Suryadevara](https://github.com/vinay92-ch)). +* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce `StorageEmbeddedRocksDB` table settings. [#59163](https://github.com/ClickHouse/ClickHouse/pull/59163) ([Duc Canh Le](https://github.com/canhld94)). +* Introduce statistics of type "number of distinct values". [#59357](https://github.com/ClickHouse/ClickHouse/pull/59357) ([Han Fei](https://github.com/hanfei1991)). +* User can now parse CRLF with TSV format using a setting `input_format_tsv_crlf_end_of_line`. Closes [#56257](https://github.com/ClickHouse/ClickHouse/issues/56257). [#59747](https://github.com/ClickHouse/ClickHouse/pull/59747) ([Shaun Struwig](https://github.com/Blargian)). +* Add Hilbert Curve encode and decode functions. [#60156](https://github.com/ClickHouse/ClickHouse/pull/60156) ([Artem Mustafin](https://github.com/Artemmm91)). +* Adds the Form Format to read/write a single record in the application/x-www-form-urlencoded format. [#60199](https://github.com/ClickHouse/ClickHouse/pull/60199) ([Shaun Struwig](https://github.com/Blargian)). +* Added possibility to compress in CROSS JOIN. [#60459](https://github.com/ClickHouse/ClickHouse/pull/60459) ([p1rattttt](https://github.com/p1rattttt)). +* New setting `input_format_force_null_for_omitted_fields` that forces NULL values for omitted fields. [#60887](https://github.com/ClickHouse/ClickHouse/pull/60887) ([Constantine Peresypkin](https://github.com/pkit)). +* Support join with inequal conditions which involve columns from both left and right table. e.g. `t1.y < t2.y`. To enable, `SET allow_experimental_join_condition = 1`. [#60920](https://github.com/ClickHouse/ClickHouse/pull/60920) ([lgbo](https://github.com/lgbo-ustc)). +* Earlier our s3 storage and s3 table function didn't support selecting from archive files. I created a solution that allows to iterate over files inside archives in S3. [#62259](https://github.com/ClickHouse/ClickHouse/pull/62259) ([Daniil Ivanik](https://github.com/divanik)). +* Support for conditional function `clamp`. [#62377](https://github.com/ClickHouse/ClickHouse/pull/62377) ([skyoct](https://github.com/skyoct)). +* Add npy output format. [#62430](https://github.com/ClickHouse/ClickHouse/pull/62430) ([豪肥肥](https://github.com/HowePa)). +* Added support for reading LINESTRING geometry in WKT format using function `readWKTLineString`. [#62519](https://github.com/ClickHouse/ClickHouse/pull/62519) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added SQL functions `generateUUIDv7`, `generateUUIDv7ThreadMonotonic`, `generateUUIDv7NonMonotonic` (with different monotonicity/performance trade-offs) to generate version 7 UUIDs aka. timestamp-based UUIDs with random component. Also added a new function `UUIDToNum` to extract bytes from a UUID and a new function `UUIDv7ToDateTime` to extract timestamp component from a UUID version 7. [#62852](https://github.com/ClickHouse/ClickHouse/pull/62852) ([Alexey Petrunyaka](https://github.com/pet74alex)). +* Implement Dynamic data type that allows to store values of any type inside it without knowing all of them in advance. Dynamic type is available under a setting `allow_experimental_dynamic_type`. [#63058](https://github.com/ClickHouse/ClickHouse/pull/63058) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to attach parts from a different disk. [#63087](https://github.com/ClickHouse/ClickHouse/pull/63087) ([Unalian](https://github.com/Unalian)). +* Allow proxy to be bypassed for hosts specified in `no_proxy` env variable and ClickHouse proxy configuration. [#63314](https://github.com/ClickHouse/ClickHouse/pull/63314) ([Arthur Passos](https://github.com/arthurpassos)). +* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce StorageEmbeddedRocksDB table settings. [#63324](https://github.com/ClickHouse/ClickHouse/pull/63324) ([Duc Canh Le](https://github.com/canhld94)). +* Raw as a synonym for TSVRaw. [#63394](https://github.com/ClickHouse/ClickHouse/pull/63394) ([Unalian](https://github.com/Unalian)). +* Added possibility to do cross join in temporary file if size exceeds limits. [#63432](https://github.com/ClickHouse/ClickHouse/pull/63432) ([p1rattttt](https://github.com/p1rattttt)). +* Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)). +* Added new SQL functions `generateSnowflakeID` for generating Twitter-style Snowflake IDs. [#63577](https://github.com/ClickHouse/ClickHouse/pull/63577) ([Danila Puzov](https://github.com/kazalika)). +* Add the ability to reshuffle rows during insert to optimize for size without violating the order set by `PRIMARY KEY`. It's controlled by the setting `optimize_row_order` (off by default). [#63578](https://github.com/ClickHouse/ClickHouse/pull/63578) ([Igor Markelov](https://github.com/ElderlyPassionFruit)). +* On Linux and MacOS, if the program has STDOUT redirected to a file with a compression extension, use the corresponding compression method instead of nothing (making it behave similarly to `INTO OUTFILE` ). [#63662](https://github.com/ClickHouse/ClickHouse/pull/63662) ([v01dXYZ](https://github.com/v01dXYZ)). +* Added `merge_workload` and `mutation_workload` settings to regulate how resources are utilized and shared between merges, mutations and other workloads. [#64061](https://github.com/ClickHouse/ClickHouse/pull/64061) ([Sergei Trifonov](https://github.com/serxa)). +* Change warning on high number of attached tables to differentiate tables, views and dictionaries. [#64180](https://github.com/ClickHouse/ClickHouse/pull/64180) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Add support for comparing IPv4 and IPv6 types using the `=` operator. [#64292](https://github.com/ClickHouse/ClickHouse/pull/64292) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Allow to store named collections in zookeeper. [#64574](https://github.com/ClickHouse/ClickHouse/pull/64574) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support decimal arguments in binary math functions (pow(), atan2(), max2, min2(), hypot(). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Add support for index analysis over `hilbertEncode`. [#64662](https://github.com/ClickHouse/ClickHouse/pull/64662) ([Artem Mustafin](https://github.com/Artemmm91)). +* Added SQL functions `parseReadableSize` (along with `OrNull` and `OrZero` variants). [#64742](https://github.com/ClickHouse/ClickHouse/pull/64742) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Add server settings `max_table_num_to_throw` and `max_database_num_to_throw` to limit the number of databases or tables on `CREATE` queries. [#64781](https://github.com/ClickHouse/ClickHouse/pull/64781) ([Xu Jia](https://github.com/XuJia0210)). +* Add _time virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)). +* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)). + +#### Performance Improvement +* Skip merging of newly created projection blocks during `INSERT`-s. [#59405](https://github.com/ClickHouse/ClickHouse/pull/59405) ([Nikita Taranov](https://github.com/nickitat)). +* Add a native parquet reader, which can read parquet binary to ClickHouse Columns directly. It's controlled by the setting `input_format_parquet_use_native_reader` (disabled by default). [#60361](https://github.com/ClickHouse/ClickHouse/pull/60361) ([ZhiHong Zhang](https://github.com/copperybean)). +* Reduce the number of virtual function calls in ColumnNullable::size(). [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)). +* Process string functions XXXUTF8 'asciily' if input strings are all ascii chars. Inspired by https://github.com/apache/doris/pull/29799. Overall speed up by 1.07x~1.62x. Notice that peak memory usage had been decreased in some cases. [#61632](https://github.com/ClickHouse/ClickHouse/pull/61632) ([李扬](https://github.com/taiyang-li)). +* Improved performance of selection (`{}`) globs in StorageS3. [#62120](https://github.com/ClickHouse/ClickHouse/pull/62120) ([Andrey Zvonov](https://github.com/zvonand)). +* HostResolver has each IP address several times. If remote host has several IPs and by some reason (firewall rules for example) access on some IPs allowed and on others forbidden, than only first record of forbidden IPs marked as failed, and in each try these IPs have a chance to be chosen (and failed again). Even if fix this, every 120 seconds DNS cache dropped, and IPs can be chosen again. [#62652](https://github.com/ClickHouse/ClickHouse/pull/62652) ([Anton Ivashkin](https://github.com/ianton-ru)). +* Speedup `splitByRegexp` when the regular expression argument is a single-character. [#62696](https://github.com/ClickHouse/ClickHouse/pull/62696) ([Robert Schulze](https://github.com/rschu1ze)). +* Speed up FixedHashTable by keeping track of the min and max keys used. This allows to reduce the number of cells that need to be verified. [#62746](https://github.com/ClickHouse/ClickHouse/pull/62746) ([Jiebin Sun](https://github.com/jiebinn)). +* Add a new configuration`prefer_merge_sort_block_bytes` to control the memory usage and speed up sorting 2 times when merging when there are many columns. [#62904](https://github.com/ClickHouse/ClickHouse/pull/62904) ([LiuNeng](https://github.com/liuneng1994)). +* `clickhouse-local` will start faster. In previous versions, it was not deleting temporary directories by mistake. Now it will. This closes [#62941](https://github.com/ClickHouse/ClickHouse/issues/62941). [#63074](https://github.com/ClickHouse/ClickHouse/pull/63074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Micro-optimizations for the new analyzer. [#63429](https://github.com/ClickHouse/ClickHouse/pull/63429) ([Raúl Marín](https://github.com/Algunenano)). +* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63443](https://github.com/ClickHouse/ClickHouse/pull/63443) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63532](https://github.com/ClickHouse/ClickHouse/pull/63532) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize the resolution of in(LowCardinality, ConstantSet). [#64060](https://github.com/ClickHouse/ClickHouse/pull/64060) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Speed up indices of type `set` a little (around 1.5 times) by removing garbage. [#64098](https://github.com/ClickHouse/ClickHouse/pull/64098) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Use a thread pool to initialize and destroy hash tables inside `ConcurrentHashJoin`. [#64241](https://github.com/ClickHouse/ClickHouse/pull/64241) ([Nikita Taranov](https://github.com/nickitat)). +* Optimized vertical merges in tables with sparse columns. [#64311](https://github.com/ClickHouse/ClickHouse/pull/64311) ([Anton Popov](https://github.com/CurtizJ)). +* Enabled prefetches of data from remote filesystem during vertical merges. It improves latency of vertical merges in tables with data stored on remote filesystem. [#64314](https://github.com/ClickHouse/ClickHouse/pull/64314) ([Anton Popov](https://github.com/CurtizJ)). +* Reduce redundant calls to `isDefault()` of `ColumnSparse::filter` to improve performance. [#64426](https://github.com/ClickHouse/ClickHouse/pull/64426) ([Jiebin Sun](https://github.com/jiebinn)). +* Speedup `find_super_nodes` and `find_big_family` keeper-client commands by making multiple asynchronous getChildren requests. [#64628](https://github.com/ClickHouse/ClickHouse/pull/64628) ([Alexander Gololobov](https://github.com/davenger)). +* Improve function least/greatest for nullable numberic type arguments. [#64668](https://github.com/ClickHouse/ClickHouse/pull/64668) ([KevinyhZou](https://github.com/KevinyhZou)). +* Allow merging two consequent `FilterSteps` of a query plan. This improves filter-push-down optimization if the filter condition can be pushed down from the parent step. [#64760](https://github.com/ClickHouse/ClickHouse/pull/64760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove bad optimization in vertical final implementation and re-enable vertical final algorithm by default. [#64783](https://github.com/ClickHouse/ClickHouse/pull/64783) ([Duc Canh Le](https://github.com/canhld94)). +* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix performance regression in cross join introduced in [#60459](https://github.com/ClickHouse/ClickHouse/issues/60459) (24.5). [#65243](https://github.com/ClickHouse/ClickHouse/pull/65243) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Support empty tuples. [#55061](https://github.com/ClickHouse/ClickHouse/pull/55061) ([Amos Bird](https://github.com/amosbird)). +* Hot reload storage policy for distributed tables when adding a new disk. [#58285](https://github.com/ClickHouse/ClickHouse/pull/58285) ([Duc Canh Le](https://github.com/canhld94)). +* Maps can now have `Float32`, `Float64`, `Array(T)`, `Map(K,V)` and `Tuple(T1, T2, ...)` as keys. Closes [#54537](https://github.com/ClickHouse/ClickHouse/issues/54537). [#59318](https://github.com/ClickHouse/ClickHouse/pull/59318) ([李扬](https://github.com/taiyang-li)). +* Avoid possible deadlock during MergeTree index analysis when scheduling threads in a saturated service. [#59427](https://github.com/ClickHouse/ClickHouse/pull/59427) ([Sean Haynes](https://github.com/seandhaynes)). +* Multiline strings with border preservation and column width change. [#59940](https://github.com/ClickHouse/ClickHouse/pull/59940) ([Volodyachan](https://github.com/Volodyachan)). +* Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support partial trivial count optimization when the query filter is able to select exact ranges from merge tree tables. [#60463](https://github.com/ClickHouse/ClickHouse/pull/60463) ([Amos Bird](https://github.com/amosbird)). +* Fix a crash in asynchronous stack unwinding (such as when using the sampling query profiler) while interpreting debug info. This closes [#60460](https://github.com/ClickHouse/ClickHouse/issues/60460). [#60468](https://github.com/ClickHouse/ClickHouse/pull/60468) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reduce max memory usage of multithreaded `INSERT`s by collecting chunks of multiple threads in a single transform. [#61047](https://github.com/ClickHouse/ClickHouse/pull/61047) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Distinct messages for s3 error 'no key' for cases disk and storage. [#61108](https://github.com/ClickHouse/ClickHouse/pull/61108) ([Sema Checherinda](https://github.com/CheSema)). +* Less contention in filesystem cache (part 4). Allow to keep filesystem cache not filled to the limit by doing additional eviction in the background (controlled by `keep_free_space_size(elements)_ratio`). This allows to release pressure from space reservation for queries (on `tryReserve` method). Also this is done in a lock free way as much as possible, e.g. should not block normal cache usage. [#61250](https://github.com/ClickHouse/ClickHouse/pull/61250) ([Kseniia Sumarokova](https://github.com/kssenii)). +* The progress bar will work for trivial queries with LIMIT from `system.zeros`, `system.zeros_mt` (it already works for `system.numbers` and `system.numbers_mt`), and the `generateRandom` table function. As a bonus, if the total number of records is greater than the `max_rows_to_read` limit, it will throw an exception earlier. This closes [#58183](https://github.com/ClickHouse/ClickHouse/issues/58183). [#61823](https://github.com/ClickHouse/ClickHouse/pull/61823) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* YAML Merge Key support. [#62685](https://github.com/ClickHouse/ClickHouse/pull/62685) ([Azat Khuzhin](https://github.com/azat)). +* Enhance error message when non-deterministic function is used with Replicated source. [#62896](https://github.com/ClickHouse/ClickHouse/pull/62896) ([Grégoire Pineau](https://github.com/lyrixx)). +* Fix interserver secret for Distributed over Distributed from `remote`. [#63013](https://github.com/ClickHouse/ClickHouse/pull/63013) ([Azat Khuzhin](https://github.com/azat)). +* Allow using `clickhouse-local` and its shortcuts `clickhouse` and `ch` with a query or queries file as a positional argument. Examples: `ch "SELECT 1"`, `ch --param_test Hello "SELECT {test:String}"`, `ch query.sql`. This closes [#62361](https://github.com/ClickHouse/ClickHouse/issues/62361). [#63081](https://github.com/ClickHouse/ClickHouse/pull/63081) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support configuration substitutions from YAML files. [#63106](https://github.com/ClickHouse/ClickHouse/pull/63106) ([Eduard Karacharov](https://github.com/korowa)). +* Reduce the memory usage when using Azure object storage by using fixed memory allocation, avoiding the allocation of an extra buffer. [#63160](https://github.com/ClickHouse/ClickHouse/pull/63160) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add TTL information in system parts_columns table. [#63200](https://github.com/ClickHouse/ClickHouse/pull/63200) ([litlig](https://github.com/litlig)). +* Keep previous data in terminal after picking from skim suggestions. [#63261](https://github.com/ClickHouse/ClickHouse/pull/63261) ([FlameFactory](https://github.com/FlameFactory)). +* Width of fields now correctly calculate, ignoring ANSI escape sequences. [#63270](https://github.com/ClickHouse/ClickHouse/pull/63270) ([Shaun Struwig](https://github.com/Blargian)). +* Enable plain_rewritable metadata for local and Azure (azure_blob_storage) object storages. [#63365](https://github.com/ClickHouse/ClickHouse/pull/63365) ([Julia Kartseva](https://github.com/jkartseva)). +* Support English-style Unicode quotes, e.g. “Hello”, ‘world’. This is questionable in general but helpful when you type your query in a word processor, such as Google Docs. This closes [#58634](https://github.com/ClickHouse/ClickHouse/issues/58634). [#63381](https://github.com/ClickHouse/ClickHouse/pull/63381) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allowed to create MaterializedMySQL database without connection to MySQL. [#63397](https://github.com/ClickHouse/ClickHouse/pull/63397) ([Kirill](https://github.com/kirillgarbar)). +* Remove copying data when writing to filesystem cache. [#63401](https://github.com/ClickHouse/ClickHouse/pull/63401) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update the usage of error code `NUMBER_OF_ARGUMENTS_DOESNT_MATCH` by more accurate error codes when appropriate. [#63406](https://github.com/ClickHouse/ClickHouse/pull/63406) ([Yohann Jardin](https://github.com/yohannj)). +* Several minor corner case fixes to proxy support & tunneling. [#63427](https://github.com/ClickHouse/ClickHouse/pull/63427) ([Arthur Passos](https://github.com/arthurpassos)). +* `os_user` and `client_hostname` are now correctly set up for queries for command line suggestions in clickhouse-client. This closes [#63430](https://github.com/ClickHouse/ClickHouse/issues/63430). [#63433](https://github.com/ClickHouse/ClickHouse/pull/63433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed tabulation from line numbering, correct handling of length when moving a line if the value has a tab, added tests. [#63493](https://github.com/ClickHouse/ClickHouse/pull/63493) ([Volodyachan](https://github.com/Volodyachan)). +* Add this `aggregate_function_group_array_has_limit_size`setting to support discarding data in some scenarios. [#63516](https://github.com/ClickHouse/ClickHouse/pull/63516) ([zhongyuankai](https://github.com/zhongyuankai)). +* Automatically mark a replica of Replicated database as lost and start recovery if some DDL task fails more than `max_retries_before_automatic_recovery` (100 by default) times in a row with the same error. Also, fixed a bug that could cause skipping DDL entries when an exception is thrown during an early stage of entry execution. [#63549](https://github.com/ClickHouse/ClickHouse/pull/63549) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)). +* Automatically correct `max_block_size=0` to default value. [#63587](https://github.com/ClickHouse/ClickHouse/pull/63587) ([Antonio Andelic](https://github.com/antonio2368)). +* Account failed files in `s3queue_tracked_file_ttl_sec` and `s3queue_traked_files_limit` for `StorageS3Queue`. [#63638](https://github.com/ClickHouse/ClickHouse/pull/63638) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add a build_id ALIAS column to trace_log to facilitate auto renaming upon detecting binary changes. This is to address [#52086](https://github.com/ClickHouse/ClickHouse/issues/52086). [#63656](https://github.com/ClickHouse/ClickHouse/pull/63656) ([Zimu Li](https://github.com/woodlzm)). +* Enable truncate operation for object storage disks. [#63693](https://github.com/ClickHouse/ClickHouse/pull/63693) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Improve io_uring resubmits visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)). +* Introduce assertions to verify all functions are called with columns of the right size. [#63723](https://github.com/ClickHouse/ClickHouse/pull/63723) ([Raúl Marín](https://github.com/Algunenano)). +* The loading of the keywords list is now dependent on the server revision and will be disabled for the old versions of ClickHouse server. CC @azat. [#63786](https://github.com/ClickHouse/ClickHouse/pull/63786) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* `SHOW CREATE TABLE` executed on top of system tables will now show the super handy comment unique for each table which will explain why this table is needed. [#63788](https://github.com/ClickHouse/ClickHouse/pull/63788) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow trailing commas in the columns list in the INSERT query. For example, `INSERT INTO test (a, b, c, ) VALUES ...`. [#63803](https://github.com/ClickHouse/ClickHouse/pull/63803) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better exception messages for the `Regexp` format. [#63804](https://github.com/ClickHouse/ClickHouse/pull/63804) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow trailing commas in the `Values` format. For example, this query is allowed: `INSERT INTO test (a, b, c) VALUES (4, 5, 6,);`. [#63810](https://github.com/ClickHouse/ClickHouse/pull/63810) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Clickhouse disks have to read server setting to obtain actual metadata format version. [#63831](https://github.com/ClickHouse/ClickHouse/pull/63831) ([Sema Checherinda](https://github.com/CheSema)). +* Disable pretty format restrictions (`output_format_pretty_max_rows`/`output_format_pretty_max_value_width`) when stdout is not TTY. [#63942](https://github.com/ClickHouse/ClickHouse/pull/63942) ([Azat Khuzhin](https://github.com/azat)). +* Exception handling now works when ClickHouse is used inside AWS Lambda. Author: [Alexey Coolnev](https://github.com/acoolnev). [#64014](https://github.com/ClickHouse/ClickHouse/pull/64014) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Throw `CANNOT_DECOMPRESS` instread of `CORRUPTED_DATA` on invalid compressed data passed via HTTP. [#64036](https://github.com/ClickHouse/ClickHouse/pull/64036) ([vdimir](https://github.com/vdimir)). +* A tip for a single large number in Pretty formats now works for Nullable and LowCardinality. This closes [#61993](https://github.com/ClickHouse/ClickHouse/issues/61993). [#64084](https://github.com/ClickHouse/ClickHouse/pull/64084) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now backups with azure blob storage will use multicopy. [#64116](https://github.com/ClickHouse/ClickHouse/pull/64116) ([alesapin](https://github.com/alesapin)). +* Added a new setting, `metadata_keep_free_space_bytes` to keep free space on the metadata storage disk. [#64128](https://github.com/ClickHouse/ClickHouse/pull/64128) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Add metrics, logs, and thread names around parts filtering with indices. [#64130](https://github.com/ClickHouse/ClickHouse/pull/64130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to use native copy for azure even with different containers. [#64154](https://github.com/ClickHouse/ClickHouse/pull/64154) ([alesapin](https://github.com/alesapin)). +* Add metrics to track the number of directories created and removed by the plain_rewritable metadata storage, and the number of entries in the local-to-remote in-memory map. [#64175](https://github.com/ClickHouse/ClickHouse/pull/64175) ([Julia Kartseva](https://github.com/jkartseva)). +* Finally enable native copy for azure. [#64182](https://github.com/ClickHouse/ClickHouse/pull/64182) ([alesapin](https://github.com/alesapin)). +* Ignore `allow_suspicious_primary_key` on `ATTACH` and verify on `ALTER`. [#64202](https://github.com/ClickHouse/ClickHouse/pull/64202) ([Azat Khuzhin](https://github.com/azat)). +* The query cache now considers identical queries with different settings as different. This increases robustness in cases where different settings (e.g. `limit` or `additional_table_filters`) would affect the query result. [#64205](https://github.com/ClickHouse/ClickHouse/pull/64205) ([Robert Schulze](https://github.com/rschu1ze)). +* Better Exception Message in Delete Table with Projection, users can understand the error and the steps should be taken. [#64212](https://github.com/ClickHouse/ClickHouse/pull/64212) ([jsc0218](https://github.com/jsc0218)). +* Support the non standard error code `QpsLimitExceeded` in object storage as a retryable error. [#64225](https://github.com/ClickHouse/ClickHouse/pull/64225) ([Sema Checherinda](https://github.com/CheSema)). +* Forbid converting a MergeTree table to replicated if the zookeeper path for this table already exists. [#64244](https://github.com/ClickHouse/ClickHouse/pull/64244) ([Kirill](https://github.com/kirillgarbar)). +* If "replica group" is configured for a `Replicated` database, automatically create a cluster that includes replicas from all groups. [#64312](https://github.com/ClickHouse/ClickHouse/pull/64312) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Added settings to disable materialization of skip indexes and statistics on inserts (`materialize_skip_indexes_on_insert` and `materialize_statistics_on_insert`). [#64391](https://github.com/ClickHouse/ClickHouse/pull/64391) ([Anton Popov](https://github.com/CurtizJ)). +* Use the allocated memory size to calculate the row group size and reduce the peak memory of the parquet writer in single-threaded mode. [#64424](https://github.com/ClickHouse/ClickHouse/pull/64424) ([LiuNeng](https://github.com/liuneng1994)). +* Added new configuration input_format_parquet_prefer_block_bytes to control the average output block bytes, and modified the default value of input_format_parquet_max_block_size to 65409. [#64427](https://github.com/ClickHouse/ClickHouse/pull/64427) ([LiuNeng](https://github.com/liuneng1994)). +* Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). +* Settings from user config doesn't affect merges and mutations for MergeTree on top of object storage. [#64456](https://github.com/ClickHouse/ClickHouse/pull/64456) ([alesapin](https://github.com/alesapin)). +* Setting `replace_long_file_name_to_hash` is enabled by default for `MergeTree` tables. [#64457](https://github.com/ClickHouse/ClickHouse/pull/64457) ([Anton Popov](https://github.com/CurtizJ)). +* Improve the iterator of sparse column to reduce call of size(). [#64497](https://github.com/ClickHouse/ClickHouse/pull/64497) ([Jiebin Sun](https://github.com/jiebinn)). +* Update condition to use copy for azure blob storage. [#64518](https://github.com/ClickHouse/ClickHouse/pull/64518) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Support the non standard error code `TotalQpsLimitExceeded` in object storage as a retryable error. [#64520](https://github.com/ClickHouse/ClickHouse/pull/64520) ([Sema Checherinda](https://github.com/CheSema)). +* Optimized memory usage of vertical merges for tables with high number of skip indexes. [#64580](https://github.com/ClickHouse/ClickHouse/pull/64580) ([Anton Popov](https://github.com/CurtizJ)). +* Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)). +* Add settings `parallel_replicas_custom_key_range_lower` and `parallel_replicas_custom_key_range_upper` to control how parallel replicas with dynamic shards parallelizes queries when using a range filter. [#64604](https://github.com/ClickHouse/ClickHouse/pull/64604) ([josh-hildred](https://github.com/josh-hildred)). +* Updated Advanced Dashboard for both open-source and ClickHouse Cloud versions to include a chart for 'Maximum concurrent network connections'. [#64610](https://github.com/ClickHouse/ClickHouse/pull/64610) ([Thom O'Connor](https://github.com/thomoco)). +* The second argument (scale) of functions `round()`, `roundBankers()`, `floor()`, `ceil()` and `trunc()` can now be non-const. [#64798](https://github.com/ClickHouse/ClickHouse/pull/64798) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Improve progress report on zeros_mt and generateRandom. [#64804](https://github.com/ClickHouse/ClickHouse/pull/64804) ([Raúl Marín](https://github.com/Algunenano)). +* Add an asynchronous metric jemalloc.profile.active to show whether sampling is currently active. This is an activation mechanism in addition to prof.active; both must be active for the calling thread to sample. [#64842](https://github.com/ClickHouse/ClickHouse/pull/64842) ([Unalian](https://github.com/Unalian)). +* Support statistics with ReplicatedMergeTree. [#64934](https://github.com/ClickHouse/ClickHouse/pull/64934) ([Han Fei](https://github.com/hanfei1991)). +* Don't mark of `allow_experimental_join_condition` as IMPORTANT. This may have prevented distributed queries in a mixed versions cluster from being executed successfully. [#65008](https://github.com/ClickHouse/ClickHouse/pull/65008) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#65716](https://github.com/ClickHouse/ClickHouse/issues/65716): `StorageS3Queue` related fixes and improvements. Deduce a default value of `s3queue_processing_threads_num` according to the number of physical cpu cores on the server (instead of the previous default value as 1). Set default value of `s3queue_loading_retries` to 10. Fix possible vague "Uncaught exception" in exception column of `system.s3queue`. Do not increment retry count on `MEMORY_LIMIT_EXCEEDED` exception. Move files commit to a stage after insertion into table fully finished to avoid files being commited while not inserted. Add settings `s3queue_max_processed_files_before_commit`, `s3queue_max_processed_rows_before_commit`, `s3queue_max_processed_bytes_before_commit`, `s3queue_max_processing_time_sec_before_commit`, to better control commit and flush time. [#65046](https://github.com/ClickHouse/ClickHouse/pull/65046) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added server Asynchronous metrics `DiskGetObjectThrottler*` and `DiskGetObjectThrottler*` reflecting request per second rate limit defined with `s3_max_get_rps` and `s3_max_put_rps` disk settings and currently available number of requests that could be sent without hitting throttling limit on the disk. Metrics are defined for every disk that has a configured limit. [#65050](https://github.com/ClickHouse/ClickHouse/pull/65050) ([Sergei Trifonov](https://github.com/serxa)). +* Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)). +* Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Initialize global trace collector for Poco::ThreadPool (needed for keeper, etc). [#65239](https://github.com/ClickHouse/ClickHouse/pull/65239) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add validation when creating a user with bcrypt_hash. [#65242](https://github.com/ClickHouse/ClickHouse/pull/65242) ([Raúl Marín](https://github.com/Algunenano)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) +* Fix a permission error where a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)). +* Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). +* Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)). +* Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65846](https://github.com/ClickHouse/ClickHouse/issues/65846): Check cyclic dependencies on CREATE/REPLACE/RENAME/EXCHANGE queries and throw an exception if there is a cyclic dependency. Previously such cyclic dependencies could lead to a deadlock during server startup. Closes [#65355](https://github.com/ClickHouse/ClickHouse/issues/65355). Also fix some bugs in dependencies creation. [#65405](https://github.com/ClickHouse/ClickHouse/pull/65405) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#65714](https://github.com/ClickHouse/ClickHouse/issues/65714): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix making backup when multiple shards are used. This PR fixes [#56566](https://github.com/ClickHouse/ClickHouse/issues/56566). [#57684](https://github.com/ClickHouse/ClickHouse/pull/57684) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix passing projections/indexes from CREATE query into inner table of MV. [#59183](https://github.com/ClickHouse/ClickHouse/pull/59183) ([Azat Khuzhin](https://github.com/azat)). +* Fix boundRatio incorrect merge. [#60532](https://github.com/ClickHouse/ClickHouse/pull/60532) ([Tao Wang](https://github.com/wangtZJU)). +* Fix crash when using some functions with low-cardinality columns. [#61966](https://github.com/ClickHouse/ClickHouse/pull/61966) ([Michael Kolupaev](https://github.com/al13n321)). +* Fixed 'set' skip index not working with IN and indexHint(). [#62083](https://github.com/ClickHouse/ClickHouse/pull/62083) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)). +* Improve the detection of cgroups v2 memory controller in unusual locations. This fixes a warning that the cgroup memory observer was disabled because no cgroups v1 or v2 current memory file could be found. [#62903](https://github.com/ClickHouse/ClickHouse/pull/62903) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix subsequent use of external tables in client. [#62964](https://github.com/ClickHouse/ClickHouse/pull/62964) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)). +* Fix bug which could lead to server to accept connections before server is actually loaded. [#63181](https://github.com/ClickHouse/ClickHouse/pull/63181) ([alesapin](https://github.com/alesapin)). +* Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)). +* Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)). +* JOIN filter push down filled join fix. Closes [#63228](https://github.com/ClickHouse/ClickHouse/issues/63228). [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix infinite loop while listing objects in Azure blob storage. [#63257](https://github.com/ClickHouse/ClickHouse/pull/63257) ([Julia Kartseva](https://github.com/jkartseva)). +* CROSS join can be executed with any value `join_algorithm` setting, close [#62431](https://github.com/ClickHouse/ClickHouse/issues/62431). [#63273](https://github.com/ClickHouse/ClickHouse/pull/63273) ([vdimir](https://github.com/vdimir)). +* Fixed a potential crash caused by a `no space left` error when temporary data in the cache is used. [#63346](https://github.com/ClickHouse/ClickHouse/pull/63346) ([vdimir](https://github.com/vdimir)). +* Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)). +* Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix debug assert when using grouping WITH ROLLUP and LowCardinality types. [#63398](https://github.com/ClickHouse/ClickHouse/pull/63398) ([Raúl Marín](https://github.com/Algunenano)). +* Fix logical errors in queries with `GROUPING SETS` and `WHERE` and `group_by_use_nulls = true`, close [#60538](https://github.com/ClickHouse/ClickHouse/issues/60538). [#63405](https://github.com/ClickHouse/ClickHouse/pull/63405) ([vdimir](https://github.com/vdimir)). +* Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)). +* Insert QueryFinish on AsyncInsertFlush with no data. [#63483](https://github.com/ClickHouse/ClickHouse/pull/63483) ([Raúl Marín](https://github.com/Algunenano)). +* Fix `system.query_log.used_dictionaries` logging. [#63487](https://github.com/ClickHouse/ClickHouse/pull/63487) ([Eduard Karacharov](https://github.com/korowa)). +* Support executing function during assignment of parameterized view value. [#63502](https://github.com/ClickHouse/ClickHouse/pull/63502) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix rabbitmq heap-use-after-free found by clang-18, which can happen if an error is thrown from RabbitMQ during initialization of exchange and queues. [#63515](https://github.com/ClickHouse/ClickHouse/pull/63515) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash on exit with sentry enabled (due to openssl destroyed before sentry). [#63548](https://github.com/ClickHouse/ClickHouse/pull/63548) ([Azat Khuzhin](https://github.com/azat)). +* Fixed parquet memory tracking. [#63584](https://github.com/ClickHouse/ClickHouse/pull/63584) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix support for Array and Map with Keyed hashing functions and materialized keys. [#63628](https://github.com/ClickHouse/ClickHouse/pull/63628) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fixed Parquet filter pushdown not working with Analyzer. [#63642](https://github.com/ClickHouse/ClickHouse/pull/63642) ([Michael Kolupaev](https://github.com/al13n321)). +* It is forbidden to convert MergeTree to replicated if the zookeeper path for this table already exists. [#63670](https://github.com/ClickHouse/ClickHouse/pull/63670) ([Kirill](https://github.com/kirillgarbar)). +* Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix rare case with missing data in the result of distributed query. [#63691](https://github.com/ClickHouse/ClickHouse/pull/63691) ([vdimir](https://github.com/vdimir)). +* Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)). +* Flatten_nested is broken with replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `SIZES_OF_COLUMNS_DOESNT_MATCH` error for queries with `arrayJoin` function in `WHERE`. Fixes [#63653](https://github.com/ClickHouse/ClickHouse/issues/63653). [#63722](https://github.com/ClickHouse/ClickHouse/pull/63722) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)). +* `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix possible crash with SYSTEM UNLOAD PRIMARY KEY. [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)). +* Fix a query with a duplicating cycling alias. Fixes [#63320](https://github.com/ClickHouse/ClickHouse/issues/63320). [#63791](https://github.com/ClickHouse/ClickHouse/pull/63791) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed performance degradation of parsing data formats in INSERT query. This closes [#62918](https://github.com/ClickHouse/ClickHouse/issues/62918). This partially reverts [#42284](https://github.com/ClickHouse/ClickHouse/issues/42284), which breaks the original design and introduces more problems. [#63801](https://github.com/ClickHouse/ClickHouse/pull/63801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add 'endpoint_subpath' S3 URI setting to allow plain_rewritable disks to share the same endpoint. [#63806](https://github.com/ClickHouse/ClickHouse/pull/63806) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix queries using parallel read buffer (e.g. with max_download_thread > 0) getting stuck when threads cannot be allocated. [#63814](https://github.com/ClickHouse/ClickHouse/pull/63814) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow JOIN filter push down to both streams if only single equivalent column is used in query. Closes [#63799](https://github.com/ClickHouse/ClickHouse/issues/63799). [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)). +* Remove the data from all disks after DROP with the Lazy database engines. Without these changes, orhpaned will remain on the disks. [#63848](https://github.com/ClickHouse/ClickHouse/pull/63848) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)). +* Fixes in `find_super_nodes` and `find_big_family` command of keeper-client: - do not fail on ZNONODE errors - find super nodes inside super nodes - properly calculate subtree node count. [#63862](https://github.com/ClickHouse/ClickHouse/pull/63862) ([Alexander Gololobov](https://github.com/davenger)). +* Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `EXPLAIN CURRENT TRANSACTION` query. [#63926](https://github.com/ClickHouse/ClickHouse/pull/63926) ([Anton Popov](https://github.com/CurtizJ)). +* Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Allow `ALTER TABLE .. MODIFY|RESET SETTING` and `ALTER TABLE .. MODIFY COMMENT` for plain_rewritable disk. [#63933](https://github.com/ClickHouse/ClickHouse/pull/63933) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix Recursive CTE with distributed queries. Closes [#63790](https://github.com/ClickHouse/ClickHouse/issues/63790). [#63939](https://github.com/ClickHouse/ClickHouse/pull/63939) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed reading of columns of type `Tuple(Map(LowCardinality(String), String), ...)`. [#63956](https://github.com/ClickHouse/ClickHouse/pull/63956) ([Anton Popov](https://github.com/CurtizJ)). +* Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)). +* Fix the `Not found column` error for queries with `skip_unused_shards = 1`, `LIMIT BY`, and the new analyzer. Fixes [#63943](https://github.com/ClickHouse/ClickHouse/issues/63943). [#63983](https://github.com/ClickHouse/ClickHouse/pull/63983) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* (Low-quality third-party Kusto Query Language). Resolve Client Abortion Issue When Using KQL Table Function in Interactive Mode. [#63992](https://github.com/ClickHouse/ClickHouse/pull/63992) ([Yong Wang](https://github.com/kashwy)). +* Fix an `Cyclic aliases` error for cyclic aliases of different type (expression and function). [#63993](https://github.com/ClickHouse/ClickHouse/pull/63993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not throw `Storage doesn't support FINAL` error for remote queries over non-MergeTree tables with `final = true` and new analyzer. Fixes [#63960](https://github.com/ClickHouse/ClickHouse/issues/63960). [#64037](https://github.com/ClickHouse/ClickHouse/pull/64037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)). +* Fix unwind on SIGSEGV on aarch64 (due to small stack for signal). [#64058](https://github.com/ClickHouse/ClickHouse/pull/64058) ([Azat Khuzhin](https://github.com/azat)). +* This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline. [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)). +* Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix azure backup writing multipart blocks as 1mb (read buffer size) instead of max_upload_part_size. [#64117](https://github.com/ClickHouse/ClickHouse/pull/64117) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)). +* Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)). +* The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `duplicate alias` error for distributed queries with `ARRAY JOIN`. [#64226](https://github.com/ClickHouse/ClickHouse/pull/64226) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected accurateCast from string to integer. [#64255](https://github.com/ClickHouse/ClickHouse/pull/64255) ([wudidapaopao](https://github.com/wudidapaopao)). +* Fixed CNF simplification, in case any OR group contains mutually exclusive atoms. [#64256](https://github.com/ClickHouse/ClickHouse/pull/64256) ([Eduard Karacharov](https://github.com/korowa)). +* Fix Query Tree size validation. [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)). +* Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Prevent recursive logging in `blob_storage_log` when it's stored on object storage. [#64393](https://github.com/ClickHouse/ClickHouse/pull/64393) ([vdimir](https://github.com/vdimir)). +* Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed `optimize_read_in_order` behaviour for ORDER BY ... NULLS FIRST / LAST on tables with nullable keys. [#64483](https://github.com/ClickHouse/ClickHouse/pull/64483) ([Eduard Karacharov](https://github.com/korowa)). +* Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed ORC statistics calculation, when writing, for unsigned types on all platforms and Int8 on ARM. [#64563](https://github.com/ClickHouse/ClickHouse/pull/64563) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)). +* Fix the output of function `formatDateTimeInJodaSyntax` when a formatter generates an uneven number of characters and the last character is `0`. For example, `SELECT formatDateTimeInJodaSyntax(toDate('2012-05-29'), 'D')` now correctly returns `150` instead of previously `15`. [#64614](https://github.com/ClickHouse/ClickHouse/pull/64614) ([LiuNeng](https://github.com/liuneng1994)). +* Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)). +* Fix type inference for float (in case of small buffer, i.e. `--max_read_buffer_size 1`). [#64641](https://github.com/ClickHouse/ClickHouse/pull/64641) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)). +* Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed excessive part elimination by token-based text indexes (`ngrambf` , `full_text`) when filtering by result of `startsWith`, `endsWith`, `match`, `multiSearchAny`. [#64720](https://github.com/ClickHouse/ClickHouse/pull/64720) ([Eduard Karacharov](https://github.com/korowa)). +* Fixes incorrect behaviour of ANSI CSI escaping in the `UTF8::computeWidth` function. [#64756](https://github.com/ClickHouse/ClickHouse/pull/64756) ([Shaun Struwig](https://github.com/Blargian)). +* Fix a case of incorrect removal of `ORDER BY` / `LIMIT BY` across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)). +* Fix (experimental) unequal join with subqueries for sets which are in the mixed join conditions. [#64775](https://github.com/ClickHouse/ClickHouse/pull/64775) ([lgbo](https://github.com/lgbo-ustc)). +* Fix crash in a local cache over `plain_rewritable` disk. [#64778](https://github.com/ClickHouse/ClickHouse/pull/64778) ([Julia Kartseva](https://github.com/jkartseva)). +* Keeper fix: return correct value for `zk_latest_snapshot_size` in `mntr` command. [#64784](https://github.com/ClickHouse/ClickHouse/pull/64784) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix memory leak in slru cache policy. [#64803](https://github.com/ClickHouse/ClickHouse/pull/64803) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)). +* Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)). +* Fix duplicating `Delete` events in `blob_storage_log` in case of large batch to delete. [#64924](https://github.com/ClickHouse/ClickHouse/pull/64924) ([vdimir](https://github.com/vdimir)). +* Backported in [#65544](https://github.com/ClickHouse/ClickHouse/issues/65544): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)). +* Fixed `Session moved to another server` error from [Zoo]Keeper that might happen after server startup when the config has includes from [Zoo]Keeper. [#64986](https://github.com/ClickHouse/ClickHouse/pull/64986) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#65582](https://github.com/ClickHouse/ClickHouse/issues/65582): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `host_id` in DatabaseReplicated when `cluster_secure_connection` parameter is enabled. Previously all the connections within the cluster created by DatabaseReplicated were not secure, even if the parameter was enabled. [#65054](https://github.com/ClickHouse/ClickHouse/pull/65054) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid writing to finalized buffer in File-like storages. [#65063](https://github.com/ClickHouse/ClickHouse/pull/65063) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible infinite query duration in case of cyclic aliases. Fixes [#64849](https://github.com/ClickHouse/ClickHouse/issues/64849). [#65081](https://github.com/ClickHouse/ClickHouse/pull/65081) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)). +* Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)). +* Respond with 5xx instead of 200 OK in case of receive timeout while reading (parts of) the request body from the client socket. [#65118](https://github.com/ClickHouse/ClickHouse/pull/65118) ([Julian Maicher](https://github.com/jmaicher)). +* Backported in [#65734](https://github.com/ClickHouse/ClickHouse/issues/65734): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)). +* Fix possible crash for hedged requests. [#65206](https://github.com/ClickHouse/ClickHouse/pull/65206) ([Azat Khuzhin](https://github.com/azat)). +* Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)). +* This PR ensures that the type of the constant(IN operator's second parameter) is always visible during the IN operator's type conversion process. Otherwise, losing type information may cause some conversions to fail, such as the conversion from DateTime to Date. fix ([#64487](https://github.com/ClickHouse/ClickHouse/issues/64487)). [#65315](https://github.com/ClickHouse/ClickHouse/pull/65315) ([pn](https://github.com/chloro-pn)). +* Backported in [#65665](https://github.com/ClickHouse/ClickHouse/issues/65665): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65606](https://github.com/ClickHouse/ClickHouse/issues/65606): Fix getting exception `Index out of bound for blob metadata` in case all files from list batch were filtered out. [#65523](https://github.com/ClickHouse/ClickHouse/pull/65523) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#65790](https://github.com/ClickHouse/ClickHouse/issues/65790): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#65814](https://github.com/ClickHouse/ClickHouse/issues/65814): Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65830](https://github.com/ClickHouse/ClickHouse/issues/65830): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). + +#### Build/Testing/Packaging Improvement +* ClickHouse is built with clang-18. A lot of new checks from clang-tidy-18 have been enabled. [#60469](https://github.com/ClickHouse/ClickHouse/pull/60469) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make `network` service be required when using the rc init script to start the ClickHouse server daemon. [#60650](https://github.com/ClickHouse/ClickHouse/pull/60650) ([Chun-Sheng, Li](https://github.com/peter279k)). +* Re-enable broken s390x build in CI. [#63135](https://github.com/ClickHouse/ClickHouse/pull/63135) ([Harry Lee](https://github.com/HarryLeeIBM)). +* The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Information about every symbol in every translation unit will be collected in the CI database for every build in the CI. This closes [#63494](https://github.com/ClickHouse/ClickHouse/issues/63494). [#63495](https://github.com/ClickHouse/ClickHouse/pull/63495) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Experimentally support loongarch64 as a new platform for ClickHouse. [#63733](https://github.com/ClickHouse/ClickHouse/pull/63733) ([qiangxuhui](https://github.com/qiangxuhui)). +* Update Apache Datasketches library. It resolves [#63858](https://github.com/ClickHouse/ClickHouse/issues/63858). [#63923](https://github.com/ClickHouse/ClickHouse/pull/63923) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable GRPC support for aarch64 linux while cross-compiling binary. [#64072](https://github.com/ClickHouse/ClickHouse/pull/64072) ([alesapin](https://github.com/alesapin)). +* Fix typo in test_hdfsCluster_unset_skip_unavailable_shards. The test writes data to unskip_unavailable_shards, but uses skip_unavailable_shards from the previous test. [#64243](https://github.com/ClickHouse/ClickHouse/pull/64243) ([Mikhail Artemenko](https://github.com/Michicosun)). +* Reduce the size of some slow tests. [#64387](https://github.com/ClickHouse/ClickHouse/pull/64387) ([Raúl Marín](https://github.com/Algunenano)). +* Reduce the size of some slow tests. [#64452](https://github.com/ClickHouse/ClickHouse/pull/64452) ([Raúl Marín](https://github.com/Algunenano)). +* Fix test_lost_part_other_replica. [#64512](https://github.com/ClickHouse/ClickHouse/pull/64512) ([Raúl Marín](https://github.com/Algunenano)). +* Add tests for experimental unequal joins and randomize new settings in clickhouse-test. [#64535](https://github.com/ClickHouse/ClickHouse/pull/64535) ([Nikita Fomichev](https://github.com/fm4v)). +* Upgrade tests: Update config and work with release candidates. [#64542](https://github.com/ClickHouse/ClickHouse/pull/64542) ([Raúl Marín](https://github.com/Algunenano)). +* Add support for LLVM XRay. [#64592](https://github.com/ClickHouse/ClickHouse/pull/64592) ([Tomer Shafir](https://github.com/tomershafir)). +* Speed up 02995_forget_partition. [#64761](https://github.com/ClickHouse/ClickHouse/pull/64761) ([Raúl Marín](https://github.com/Algunenano)). +* Fix 02790_async_queries_in_query_log. [#64764](https://github.com/ClickHouse/ClickHouse/pull/64764) ([Raúl Marín](https://github.com/Algunenano)). +* Support LLVM XRay on Linux amd64 only. [#64837](https://github.com/ClickHouse/ClickHouse/pull/64837) ([Tomer Shafir](https://github.com/tomershafir)). +* Get rid of custom code in `tests/ci/download_release_packages.py` and `tests/ci/get_previous_release_tag.py` to avoid issues after the https://github.com/ClickHouse/ClickHouse/pull/64759 is merged. [#64848](https://github.com/ClickHouse/ClickHouse/pull/64848) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NO CL CATEGORY + +* Backported in [#65568](https://github.com/ClickHouse/ClickHouse/issues/65568):. [#65498](https://github.com/ClickHouse/ClickHouse/pull/65498) ([Sergei Trifonov](https://github.com/serxa)). +* Backported in [#65693](https://github.com/ClickHouse/ClickHouse/issues/65693):. [#65686](https://github.com/ClickHouse/ClickHouse/pull/65686) ([Raúl Marín](https://github.com/Algunenano)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Do not remove server constants from GROUP BY key for secondary query."'. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Introduce bulk loading to StorageEmbeddedRocksDB"'. [#63316](https://github.com/ClickHouse/ClickHouse/pull/63316) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Revert "Do not remove server constants from GROUP BY key for secondary query.""'. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "Fix index analysis for `DateTime64`"'. [#63525](https://github.com/ClickHouse/ClickHouse/pull/63525) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Update gui.md - Add ch-ui to open-source available tools."'. [#64064](https://github.com/ClickHouse/ClickHouse/pull/64064) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Prevent conversion to Replicated if zookeeper path already exists"'. [#64214](https://github.com/ClickHouse/ClickHouse/pull/64214) ([Sergei Trifonov](https://github.com/serxa)). +* NO CL ENTRY: 'Revert "Refactoring of Server.h: Isolate server management from other logic"'. [#64425](https://github.com/ClickHouse/ClickHouse/pull/64425) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Remove some unnecessary `UNREACHABLE`s"'. [#64430](https://github.com/ClickHouse/ClickHouse/pull/64430) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "CI: fix build_report selection in case of job reuse"'. [#64516](https://github.com/ClickHouse/ClickHouse/pull/64516) ([Max K.](https://github.com/maxknv)). +* NO CL ENTRY: 'Revert "Revert "CI: fix build_report selection in case of job reuse""'. [#64531](https://github.com/ClickHouse/ClickHouse/pull/64531) ([Max K.](https://github.com/maxknv)). +* NO CL ENTRY: 'Revert "Add `fromReadableSize` function"'. [#64616](https://github.com/ClickHouse/ClickHouse/pull/64616) ([Robert Schulze](https://github.com/rschu1ze)). +* NO CL ENTRY: 'Update CHANGELOG.md'. [#64816](https://github.com/ClickHouse/ClickHouse/pull/64816) ([Paweł Kudzia](https://github.com/pakud)). +* NO CL ENTRY: 'Revert "Reduce lock contention for MergeTree tables (by renaming parts without holding lock)"'. [#64899](https://github.com/ClickHouse/ClickHouse/pull/64899) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Add dynamic untracked memory limits for more precise memory tracking"'. [#64969](https://github.com/ClickHouse/ClickHouse/pull/64969) ([Sergei Trifonov](https://github.com/serxa)). +* NO CL ENTRY: 'Revert "Fix duplicating Delete events in blob_storage_log"'. [#65049](https://github.com/ClickHouse/ClickHouse/pull/65049) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert "Fix duplicating Delete events in blob_storage_log""'. [#65053](https://github.com/ClickHouse/ClickHouse/pull/65053) ([vdimir](https://github.com/vdimir)). +* NO CL ENTRY: 'Revert "S3: reduce retires time for queries, increase retries count for backups"'. [#65148](https://github.com/ClickHouse/ClickHouse/pull/65148) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Small fix for 02340_parts_refcnt_mergetree"'. [#65149](https://github.com/ClickHouse/ClickHouse/pull/65149) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Change default s3_throw_on_zero_files_match to true, document that presigned S3 URLs are not supported"'. [#65250](https://github.com/ClickHouse/ClickHouse/pull/65250) ([Max K.](https://github.com/maxknv)). +* NO CL ENTRY: 'Revert "Fix AWS ECS"'. [#65361](https://github.com/ClickHouse/ClickHouse/pull/65361) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Try abort on current thread join. [#42544](https://github.com/ClickHouse/ClickHouse/pull/42544) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* This change was reverted. [#51008](https://github.com/ClickHouse/ClickHouse/pull/51008) ([Michael Kolupaev](https://github.com/al13n321)). +* Analyzer fuzzer 2. [#57098](https://github.com/ClickHouse/ClickHouse/pull/57098) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Analyzer fuzzer 4. [#57101](https://github.com/ClickHouse/ClickHouse/pull/57101) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check python code with flake8. [#58349](https://github.com/ClickHouse/ClickHouse/pull/58349) ([Azat Khuzhin](https://github.com/azat)). +* Unite s3/hdfs/azure storage implementations into a single class working with IObjectStorage. Same for *Cluster, data lakes and Queue storages. [#59767](https://github.com/ClickHouse/ClickHouse/pull/59767) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove http_max_chunk_size setting (too internal). [#60852](https://github.com/ClickHouse/ClickHouse/pull/60852) ([Azat Khuzhin](https://github.com/azat)). +* Fix race in refreshable materialized views causing SELECT to fail sometimes. [#60883](https://github.com/ClickHouse/ClickHouse/pull/60883) ([Michael Kolupaev](https://github.com/al13n321)). +* Refactor KeyCondition and key analysis to improve PartitionPruner and trivial count optimization. This is separated from [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463) . [#61459](https://github.com/ClickHouse/ClickHouse/pull/61459) ([Amos Bird](https://github.com/amosbird)). +* Implement cumulative A Sync status. [#61464](https://github.com/ClickHouse/ClickHouse/pull/61464) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Parallel replicas: table check failover. [#61935](https://github.com/ClickHouse/ClickHouse/pull/61935) ([Igor Nikonov](https://github.com/devcrafter)). +* This change was reverted. [#61973](https://github.com/ClickHouse/ClickHouse/pull/61973) ([Azat Khuzhin](https://github.com/azat)). +* Avoid crashing on column type mismatch in a few dozen places. [#62087](https://github.com/ClickHouse/ClickHouse/pull/62087) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix optimize_if_chain_to_multiif const NULL handling. [#62104](https://github.com/ClickHouse/ClickHouse/pull/62104) ([Michael Kolupaev](https://github.com/al13n321)). +* Use intrusive lists for `ResourceRequest` instead of deque. [#62165](https://github.com/ClickHouse/ClickHouse/pull/62165) ([Sergei Trifonov](https://github.com/serxa)). +* Analyzer: Fix validateAggregates for tables with different aliases. [#62346](https://github.com/ClickHouse/ClickHouse/pull/62346) ([vdimir](https://github.com/vdimir)). +* Improve code and tests of `DROP` of multiple tables. [#62359](https://github.com/ClickHouse/ClickHouse/pull/62359) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix exception message during writing to partitioned s3/hdfs/azure path with globs. [#62423](https://github.com/ClickHouse/ClickHouse/pull/62423) ([Kruglov Pavel](https://github.com/Avogar)). +* Support UBSan on Clang-19 (master). [#62466](https://github.com/ClickHouse/ClickHouse/pull/62466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Replay ZK logs using keeper-bench. [#62481](https://github.com/ClickHouse/ClickHouse/pull/62481) ([Antonio Andelic](https://github.com/antonio2368)). +* Save the stacktrace of thread waiting on failing AsyncLoader job. [#62719](https://github.com/ClickHouse/ClickHouse/pull/62719) ([Sergei Trifonov](https://github.com/serxa)). +* group_by_use_nulls strikes back. [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Analyzer: prefer column name to alias from array join. [#62995](https://github.com/ClickHouse/ClickHouse/pull/62995) ([vdimir](https://github.com/vdimir)). +* CI: try separate the workflows file for GitHub's Merge Queue. [#63123](https://github.com/ClickHouse/ClickHouse/pull/63123) ([Max K.](https://github.com/maxknv)). +* Try to fix coverage tests. [#63130](https://github.com/ClickHouse/ClickHouse/pull/63130) ([Raúl Marín](https://github.com/Algunenano)). +* Fix azure backup flaky test. [#63158](https://github.com/ClickHouse/ClickHouse/pull/63158) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Merging [#60920](https://github.com/ClickHouse/ClickHouse/issues/60920). [#63159](https://github.com/ClickHouse/ClickHouse/pull/63159) ([vdimir](https://github.com/vdimir)). +* QueryAnalysisPass improve QUALIFY validation. [#63162](https://github.com/ClickHouse/ClickHouse/pull/63162) ([Maksim Kita](https://github.com/kitaisreal)). +* Add numpy tests for different endianness. [#63189](https://github.com/ClickHouse/ClickHouse/pull/63189) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Clean the `_work` directory between runner's launches. Fallback to auto-update actions runner if it fails to start. Make the `init-network.sh` sourceable and executable. [#63195](https://github.com/ClickHouse/ClickHouse/pull/63195) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add ability to run Azure tests in PR with label. [#63196](https://github.com/ClickHouse/ClickHouse/pull/63196) ([alesapin](https://github.com/alesapin)). +* Fix possible endless loop while reading from azure. [#63197](https://github.com/ClickHouse/ClickHouse/pull/63197) ([Anton Popov](https://github.com/CurtizJ)). +* Add information about materialized view security bug fix into the changelog. [#63204](https://github.com/ClickHouse/ClickHouse/pull/63204) ([pufit](https://github.com/pufit)). +* Disable one test from 02994_sanity_check_settings. [#63208](https://github.com/ClickHouse/ClickHouse/pull/63208) ([Raúl Marín](https://github.com/Algunenano)). +* Enable custom parquet encoder by default, attempt 2. [#63210](https://github.com/ClickHouse/ClickHouse/pull/63210) ([Michael Kolupaev](https://github.com/al13n321)). +* Update version after release. [#63215](https://github.com/ClickHouse/ClickHouse/pull/63215) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v24.4.1.2088-stable. [#63217](https://github.com/ClickHouse/ClickHouse/pull/63217) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v24.3.3.102-lts. [#63226](https://github.com/ClickHouse/ClickHouse/pull/63226) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v24.2.3.70-stable. [#63227](https://github.com/ClickHouse/ClickHouse/pull/63227) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Return back [#61551](https://github.com/ClickHouse/ClickHouse/issues/61551) (More optimal loading of marks). [#63233](https://github.com/ClickHouse/ClickHouse/pull/63233) ([Anton Popov](https://github.com/CurtizJ)). +* Hide CI options under a spoiler. [#63237](https://github.com/ClickHouse/ClickHouse/pull/63237) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Add azure run with msan. [#63238](https://github.com/ClickHouse/ClickHouse/pull/63238) ([alesapin](https://github.com/alesapin)). +* Now syntax for this command is following: `TRUNCATE ALL TABLES FROM [IF EXISTS] `. [#63241](https://github.com/ClickHouse/ClickHouse/pull/63241) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Minor follow-up to a renaming PR. [#63260](https://github.com/ClickHouse/ClickHouse/pull/63260) ([Robert Schulze](https://github.com/rschu1ze)). +* Followup for [#62613](https://github.com/ClickHouse/ClickHouse/issues/62613) Adding back checks similar to these: https://github.com/ClickHouse/ClickHouse/pull/62613/files#diff-70859078da57ecdfc66d26f732c0d7718d269e82bdc80e62b39f5ffeab36c05bL99 https://github.com/ClickHouse/ClickHouse/pull/62613/files#diff-70859078da57ecdfc66d26f732c0d7718d269e82bdc80e62b39f5ffeab36c05bL144-L149. [#63274](https://github.com/ClickHouse/ClickHouse/pull/63274) ([Alexander Gololobov](https://github.com/davenger)). +* This setting was added in 24.5, not 24.4. [#63278](https://github.com/ClickHouse/ClickHouse/pull/63278) ([Raúl Marín](https://github.com/Algunenano)). +* Improve cloud backport script. [#63282](https://github.com/ClickHouse/ClickHouse/pull/63282) ([Raúl Marín](https://github.com/Algunenano)). +* Update version_date.tsv and changelogs after v23.8.14.6-lts. [#63285](https://github.com/ClickHouse/ClickHouse/pull/63285) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix azure flaky test. [#63286](https://github.com/ClickHouse/ClickHouse/pull/63286) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix deadlock in `CacheDictionaryUpdateQueue` in case of exception in constructor. [#63287](https://github.com/ClickHouse/ClickHouse/pull/63287) ([Nikita Taranov](https://github.com/nickitat)). +* DiskApp: fix 'list --recursive /' and crash on invalid arguments. [#63296](https://github.com/ClickHouse/ClickHouse/pull/63296) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix terminate because of unhandled exception in `MergeTreeDeduplicationLog::shutdown`. [#63298](https://github.com/ClickHouse/ClickHouse/pull/63298) ([Nikita Taranov](https://github.com/nickitat)). +* Move s3_plain_rewritable unit test to shell. [#63317](https://github.com/ClickHouse/ClickHouse/pull/63317) ([Julia Kartseva](https://github.com/jkartseva)). +* Add tests for [#63264](https://github.com/ClickHouse/ClickHouse/issues/63264). [#63321](https://github.com/ClickHouse/ClickHouse/pull/63321) ([Raúl Marín](https://github.com/Algunenano)). +* Try fix segfault in `MergeTreeReadPoolBase::createTask`. [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)). +* Reduce time-to-insert profiling data in case of logs cluster issues. [#63325](https://github.com/ClickHouse/ClickHouse/pull/63325) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update README.md. [#63326](https://github.com/ClickHouse/ClickHouse/pull/63326) ([Tyler Hannan](https://github.com/tylerhannan)). +* This should fix failures with error like `Permission denied ["/var/lib/clickhouse/disks/s3/store/364/3643ff83-0996-4a4a-a90b-a96e66a10c74"]` when table dir was chmod-ed by DatabaseCatalog. [#63330](https://github.com/ClickHouse/ClickHouse/pull/63330) ([Alexander Gololobov](https://github.com/davenger)). +* Use `/commit/` to have the URLs in [reports](https://play.clickhouse.com/play?user=play#c2VsZWN0IGRpc3RpbmN0IGNvbW1pdF91cmwgZnJvbSBjaGVja3Mgd2hlcmUgY2hlY2tfc3RhcnRfdGltZSA+PSBub3coKSAtIGludGVydmFsIDEgbW9udGggYW5kIHB1bGxfcmVxdWVzdF9udW1iZXI9NjA1MzI=) like https://github.com/ClickHouse/ClickHouse/commit/44f8bc5308b53797bec8cccc3bd29fab8a00235d and not like https://github.com/ClickHouse/ClickHouse/commits/44f8bc5308b53797bec8cccc3bd29fab8a00235d. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add test for [#56287](https://github.com/ClickHouse/ClickHouse/issues/56287). [#63340](https://github.com/ClickHouse/ClickHouse/pull/63340) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md. [#63350](https://github.com/ClickHouse/ClickHouse/pull/63350) ([Tyler Hannan](https://github.com/tylerhannan)). +* Add test for [#48049](https://github.com/ClickHouse/ClickHouse/issues/48049). [#63351](https://github.com/ClickHouse/ClickHouse/pull/63351) ([Raúl Marín](https://github.com/Algunenano)). +* Add option `query_id_prefix` to `clickhouse-benchmark`. [#63352](https://github.com/ClickHouse/ClickHouse/pull/63352) ([Anton Popov](https://github.com/CurtizJ)). +* New version is fantatish (at least with Ubuntu 22.04.4 LTS): ``` azurite --version /usr/local/lib/node_modules/azurite/dist/src/common/persistence/MemoryExtentStore.js:53 return this._chunks.get(categoryName)?.chunks.get(id); ^. [#63354](https://github.com/ClickHouse/ClickHouse/pull/63354) ([alesapin](https://github.com/alesapin)). +* Randomize setting `enable_block_offset_column` in stress tests. [#63355](https://github.com/ClickHouse/ClickHouse/pull/63355) ([Anton Popov](https://github.com/CurtizJ)). +* Fix AST parsing of invalid type names. [#63357](https://github.com/ClickHouse/ClickHouse/pull/63357) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix some 00002_log_and_exception_messages_formatting flakiness. [#63358](https://github.com/ClickHouse/ClickHouse/pull/63358) ([Michael Kolupaev](https://github.com/al13n321)). +* Add tags for the test 03000_traverse_shadow_system_data_paths.sql to make it stable. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Add a test for [#55655](https://github.com/ClickHouse/ClickHouse/issues/55655). [#63380](https://github.com/ClickHouse/ClickHouse/pull/63380) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `reportBrokenPart`. [#63396](https://github.com/ClickHouse/ClickHouse/pull/63396) ([Antonio Andelic](https://github.com/antonio2368)). +* Workaround for `oklch()` inside canvas bug for firefox. [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)). +* Add test for issue [#47862](https://github.com/ClickHouse/ClickHouse/issues/47862). [#63424](https://github.com/ClickHouse/ClickHouse/pull/63424) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix parsing of `CREATE INDEX` query. [#63425](https://github.com/ClickHouse/ClickHouse/pull/63425) ([Anton Popov](https://github.com/CurtizJ)). +* We are using Shared Catalog in the CI Logs cluster. [#63442](https://github.com/ClickHouse/ClickHouse/pull/63442) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix collection of coverage data in the CI Logs cluster. [#63453](https://github.com/ClickHouse/ClickHouse/pull/63453) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test for rocksdb bulk sink. [#63457](https://github.com/ClickHouse/ClickHouse/pull/63457) ([Duc Canh Le](https://github.com/canhld94)). +* Extra constraints for stress and fuzzer tests. [#63470](https://github.com/ClickHouse/ClickHouse/pull/63470) ([Raúl Marín](https://github.com/Algunenano)). +* io_uring: refactor get reader from context. [#63475](https://github.com/ClickHouse/ClickHouse/pull/63475) ([Tomer Shafir](https://github.com/tomershafir)). +* Analyzer setting max_streams_to_max_threads_ratio overflow fix. [#63478](https://github.com/ClickHouse/ClickHouse/pull/63478) ([Maksim Kita](https://github.com/kitaisreal)). +* Provides setting `output_format_pretty_preserve_border_for_multiline_string` which allows to render multiline strings in pretty format better. The default value for this setting is true. [#63479](https://github.com/ClickHouse/ClickHouse/pull/63479) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix logical error when reloading config with customly created web disk broken after [#56367](https://github.com/ClickHouse/ClickHouse/issues/56367). [#63484](https://github.com/ClickHouse/ClickHouse/pull/63484) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add test for [#49307](https://github.com/ClickHouse/ClickHouse/issues/49307). [#63486](https://github.com/ClickHouse/ClickHouse/pull/63486) ([Anton Popov](https://github.com/CurtizJ)). +* Remove leftovers of GCC support in cmake rules. [#63488](https://github.com/ClickHouse/ClickHouse/pull/63488) ([Azat Khuzhin](https://github.com/azat)). +* Fix ProfileEventTimeIncrement code. [#63489](https://github.com/ClickHouse/ClickHouse/pull/63489) ([Azat Khuzhin](https://github.com/azat)). +* MergeTreePrefetchedReadPool: Print parent name when logging projection parts. [#63522](https://github.com/ClickHouse/ClickHouse/pull/63522) ([Raúl Marín](https://github.com/Algunenano)). +* Correctly stop `asyncCopy` tasks in all cases. [#63523](https://github.com/ClickHouse/ClickHouse/pull/63523) ([Antonio Andelic](https://github.com/antonio2368)). +* Almost everything should work on AArch64 (Part of [#58061](https://github.com/ClickHouse/ClickHouse/issues/58061)). [#63527](https://github.com/ClickHouse/ClickHouse/pull/63527) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update randomization of `old_parts_lifetime`. [#63530](https://github.com/ClickHouse/ClickHouse/pull/63530) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update 02240_system_filesystem_cache_table.sh. [#63531](https://github.com/ClickHouse/ClickHouse/pull/63531) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in `DistributedSink`. [#63538](https://github.com/ClickHouse/ClickHouse/pull/63538) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix azure tests run on master. [#63540](https://github.com/ClickHouse/ClickHouse/pull/63540) ([alesapin](https://github.com/alesapin)). +* The commit 2b8254f987a65d5c21d74fe67b4ee9757970466e was not synced into the cloud because it was falsely marked as a success by `upstream_pr.head.sha`. Here we'll try our best to find a proper commit, and won't make anything if we can't. [#63543](https://github.com/ClickHouse/ClickHouse/pull/63543) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add `no-s3-storage` tag to local_plain_rewritable ut. [#63546](https://github.com/ClickHouse/ClickHouse/pull/63546) ([Julia Kartseva](https://github.com/jkartseva)). +* Add `jwcrypto` to integration tests runner. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Go back to upstream lz4. [#63574](https://github.com/ClickHouse/ClickHouse/pull/63574) ([Raúl Marín](https://github.com/Algunenano)). +* Fix logical error in ColumnTuple::tryInsert(). [#63583](https://github.com/ClickHouse/ClickHouse/pull/63583) ([Michael Kolupaev](https://github.com/al13n321)). +* harmonize sumMap error messages on ILLEGAL_TYPE_OF_ARGUMENT. [#63619](https://github.com/ClickHouse/ClickHouse/pull/63619) ([Yohann Jardin](https://github.com/yohannj)). +* Refactor data part writer to remove dependencies on MergeTreeData and DataPart. [#63620](https://github.com/ClickHouse/ClickHouse/pull/63620) ([Alexander Gololobov](https://github.com/davenger)). +* Update README.md. [#63631](https://github.com/ClickHouse/ClickHouse/pull/63631) ([Tyler Hannan](https://github.com/tylerhannan)). +* Ignore global profiler if system.trace_log is not enabled and fix really disable it for keeper standalone build. [#63632](https://github.com/ClickHouse/ClickHouse/pull/63632) ([Azat Khuzhin](https://github.com/azat)). +* Fixes for 00002_log_and_exception_messages_formatting. [#63634](https://github.com/ClickHouse/ClickHouse/pull/63634) ([Azat Khuzhin](https://github.com/azat)). +* Fix 02362_part_log_merge_algorithm flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Fix tests flakiness due to long SYSTEM FLUSH LOGS (explicitly specify old_parts_lifetime). [#63639](https://github.com/ClickHouse/ClickHouse/pull/63639) ([Azat Khuzhin](https://github.com/azat)). +* Update clickhouse-test help section. [#63663](https://github.com/ClickHouse/ClickHouse/pull/63663) ([Ali](https://github.com/xogoodnow)). +* Fix bad test `02950_part_log_bytes_uncompressed`. [#63672](https://github.com/ClickHouse/ClickHouse/pull/63672) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove leftovers of `optimize_monotonous_functions_in_order_by`. [#63674](https://github.com/ClickHouse/ClickHouse/pull/63674) ([Nikita Taranov](https://github.com/nickitat)). +* tests: attempt to fix 02340_parts_refcnt_mergetree flakiness. [#63684](https://github.com/ClickHouse/ClickHouse/pull/63684) ([Azat Khuzhin](https://github.com/azat)). +* Parallel replicas: simple cleanup. [#63685](https://github.com/ClickHouse/ClickHouse/pull/63685) ([Igor Nikonov](https://github.com/devcrafter)). +* Cancel S3 reads properly when parallel reads are used. [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)). +* Explaining insertion order of the Map datatype. [#63690](https://github.com/ClickHouse/ClickHouse/pull/63690) ([Mark Needham](https://github.com/mneedham)). +* selectRangesToRead() simple cleanup. [#63692](https://github.com/ClickHouse/ClickHouse/pull/63692) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzed analyzer_join_with_constant query. [#63702](https://github.com/ClickHouse/ClickHouse/pull/63702) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add missing explicit instantiations of ColumnUnique. [#63718](https://github.com/ClickHouse/ClickHouse/pull/63718) ([Raúl Marín](https://github.com/Algunenano)). +* Better asserts in ColumnString.h. [#63719](https://github.com/ClickHouse/ClickHouse/pull/63719) ([Raúl Marín](https://github.com/Algunenano)). +* Try to fix flaky s3 tests test_seekable_formats and test_seekable_formats_url. [#63720](https://github.com/ClickHouse/ClickHouse/pull/63720) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't randomize some settings in 02941_variant_type_* tests to avoid timeouts. [#63721](https://github.com/ClickHouse/ClickHouse/pull/63721) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix flaky 03145_non_loaded_projection_backup.sh. [#63728](https://github.com/ClickHouse/ClickHouse/pull/63728) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Userspace page cache: don't collect stats if cache is unused. [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix insignificant UBSAN error in QueryAnalyzer::replaceNodesWithPositionalArguments(). [#63734](https://github.com/ClickHouse/ClickHouse/pull/63734) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix a bug in resolving matcher inside lambda inside ARRAY JOIN. [#63744](https://github.com/ClickHouse/ClickHouse/pull/63744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Self explanatory. [#63754](https://github.com/ClickHouse/ClickHouse/pull/63754) ([Arthur Passos](https://github.com/arthurpassos)). +* Do not hide disk name. [#63756](https://github.com/ClickHouse/ClickHouse/pull/63756) ([Kseniia Sumarokova](https://github.com/kssenii)). +* CI: remove Cancel and Debug workflows as redundant. [#63757](https://github.com/ClickHouse/ClickHouse/pull/63757) ([Max K.](https://github.com/maxknv)). +* Security Policy: Add notification process. [#63773](https://github.com/ClickHouse/ClickHouse/pull/63773) ([Leticia Webb](https://github.com/leticiawebb)). +* Fix typo. [#63774](https://github.com/ClickHouse/ClickHouse/pull/63774) ([Anton Popov](https://github.com/CurtizJ)). +* Fix fuzzer when only explicit faults are used. [#63775](https://github.com/ClickHouse/ClickHouse/pull/63775) ([Raúl Marín](https://github.com/Algunenano)). +* Settings typo. [#63782](https://github.com/ClickHouse/ClickHouse/pull/63782) ([Rory Crispin](https://github.com/RoryCrispin)). +* Ref. [#63479](https://github.com/ClickHouse/ClickHouse/issues/63479). [#63783](https://github.com/ClickHouse/ClickHouse/pull/63783) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix test_odbc_interaction from aarch64 [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)). +* Fix test `test_catboost_evaluate` for aarch64. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)). +* Rewrite plan for parallel replicas in Planner. [#63796](https://github.com/ClickHouse/ClickHouse/pull/63796) ([Igor Nikonov](https://github.com/devcrafter)). +* Follow-up for the `binary_symbols` table in CI. [#63802](https://github.com/ClickHouse/ClickHouse/pull/63802) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support INSERT with VALUES in the ANTLR syntax file. [#63811](https://github.com/ClickHouse/ClickHouse/pull/63811) ([GG Bond](https://github.com/zzyReal666)). +* Fix race in `ReplicatedMergeTreeLogEntryData`. [#63816](https://github.com/ClickHouse/ClickHouse/pull/63816) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow allocation during job destructor in `ThreadPool`. [#63829](https://github.com/ClickHouse/ClickHouse/pull/63829) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove HDFS from disks config for one integration test for arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)). +* io_uring: add basic io_uring clickhouse perf test. [#63835](https://github.com/ClickHouse/ClickHouse/pull/63835) ([Tomer Shafir](https://github.com/tomershafir)). +* Bump version for old image in test_short_strings_aggregation to make it work on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)). +* fix typo. [#63838](https://github.com/ClickHouse/ClickHouse/pull/63838) ([Alexander Gololobov](https://github.com/davenger)). +* Disable test `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)). +* This PR was reverted. [#63857](https://github.com/ClickHouse/ClickHouse/pull/63857) ([Sema Checherinda](https://github.com/CheSema)). +* Remove unnecessary logging statements in MergeJoinTransform.cpp. [#63860](https://github.com/ClickHouse/ClickHouse/pull/63860) ([vdimir](https://github.com/vdimir)). +* Temporary disables 3 integration tcs on arm until https://github.com/clickhouse/clickhouse/issues/63855 is resolved. [#63867](https://github.com/ClickHouse/ClickHouse/pull/63867) ([Max K.](https://github.com/maxknv)). +* Fix some settings values in 02455_one_row_from_csv_memory_usage test to make it less flaky. [#63874](https://github.com/ClickHouse/ClickHouse/pull/63874) ([Kruglov Pavel](https://github.com/Avogar)). +* Randomise `allow_experimental_parallel_reading_from_replicas` in stress tests. [#63899](https://github.com/ClickHouse/ClickHouse/pull/63899) ([Nikita Taranov](https://github.com/nickitat)). +* Fix logs test for binary data by converting it to a valid UTF8 string. [#63909](https://github.com/ClickHouse/ClickHouse/pull/63909) ([Alexey Katsman](https://github.com/alexkats)). +* More sanity checks for parallel replicas. [#63910](https://github.com/ClickHouse/ClickHouse/pull/63910) ([Nikita Taranov](https://github.com/nickitat)). +* Include checks like `Stateless tests (asan, distributed cache, meta storage in keeper, s3 storage) [2/3]` in `Mergeable Check` and `A Sync`. [#63945](https://github.com/ClickHouse/ClickHouse/pull/63945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Insignificant libunwind build fixes. [#63946](https://github.com/ClickHouse/ClickHouse/pull/63946) ([Azat Khuzhin](https://github.com/azat)). +* Revert multiline pretty changes due to performance problems. [#63947](https://github.com/ClickHouse/ClickHouse/pull/63947) ([Raúl Marín](https://github.com/Algunenano)). +* Some usability improvements for c++expr script. [#63948](https://github.com/ClickHouse/ClickHouse/pull/63948) ([Azat Khuzhin](https://github.com/azat)). +* Fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)). +* CI: aarch64: disable arm integration tests with kerberaized kafka. [#63961](https://github.com/ClickHouse/ClickHouse/pull/63961) ([Max K.](https://github.com/maxknv)). +* Make events like [timeouts](https://play.clickhouse.com/play?user=play#U0VMRUNUICogRlJPTSBjaGVja3MgV0hFUkUgdGVzdF9uYW1lID09ICdDaGVjayB0aW1lb3V0IGV4cGlyZWQnIEFORCBjaGVja19zdGFydF90aW1lIEJFVFdFRU4gdG9EYXRlKCcyMDI0LTA1LTEwJykgQU5EIHRvRGF0ZSgnMjAyNC0wNS0xNScp) visible in CI DB. [#63982](https://github.com/ClickHouse/ClickHouse/pull/63982) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Slightly better setting `force_optimize_projection_name`. [#63997](https://github.com/ClickHouse/ClickHouse/pull/63997) ([Anton Popov](https://github.com/CurtizJ)). +* chore(ci-workers): remove reusable from tailscale key. [#63999](https://github.com/ClickHouse/ClickHouse/pull/63999) ([Gabriel Martinez](https://github.com/GMartinez-Sisti)). +* Better script to collect symbols statistics. [#64013](https://github.com/ClickHouse/ClickHouse/pull/64013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a typo in Analyzer. [#64022](https://github.com/ClickHouse/ClickHouse/pull/64022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix libbcrypt for FreeBSD build. [#64023](https://github.com/ClickHouse/ClickHouse/pull/64023) ([Azat Khuzhin](https://github.com/azat)). +* Remove some unnecessary `UNREACHABLE`s. [#64035](https://github.com/ClickHouse/ClickHouse/pull/64035) ([Robert Schulze](https://github.com/rschu1ze)). +* Add `ClickHouseVersion.copy` method. Create a branch release in advance without spinning out the release to increase the stability. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix searching for libclang_rt.builtins.*.a on FreeBSD. [#64051](https://github.com/ClickHouse/ClickHouse/pull/64051) ([Azat Khuzhin](https://github.com/azat)). +* The mime type is not 100% reliable for Python and shell scripts without shebangs; add a check for file extension. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix waiting for mutations with retriable errors. [#64063](https://github.com/ClickHouse/ClickHouse/pull/64063) ([Alexander Tokmakov](https://github.com/tavplubix)). +* harmonize h3PointDist* error messages. [#64080](https://github.com/ClickHouse/ClickHouse/pull/64080) ([Yohann Jardin](https://github.com/yohannj)). +* This log message is better in Trace. [#64081](https://github.com/ClickHouse/ClickHouse/pull/64081) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent stack overflow in Fuzzer and Stress test. [#64082](https://github.com/ClickHouse/ClickHouse/pull/64082) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* tests: fix expected error for 03036_reading_s3_archives (fixes CI). [#64089](https://github.com/ClickHouse/ClickHouse/pull/64089) ([Azat Khuzhin](https://github.com/azat)). +* Fix sanitizers. [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)). +* Update llvm/clang to 18.1.6. [#64091](https://github.com/ClickHouse/ClickHouse/pull/64091) ([Azat Khuzhin](https://github.com/azat)). +* Set green Mergeable Check status only after all required checks are passed with success - All non-required checks are started at stage Test_3 when all required checks are passed in Test_1/2. [#64093](https://github.com/ClickHouse/ClickHouse/pull/64093) ([Max K.](https://github.com/maxknv)). +* Move `isAllASCII` from UTFHelper to StringUtils. [#64108](https://github.com/ClickHouse/ClickHouse/pull/64108) ([Robert Schulze](https://github.com/rschu1ze)). +* Throw out some `inline`s. [#64110](https://github.com/ClickHouse/ClickHouse/pull/64110) ([Robert Schulze](https://github.com/rschu1ze)). +* Clean up .clang-tidy after transition to Clang 18. [#64111](https://github.com/ClickHouse/ClickHouse/pull/64111) ([Robert Schulze](https://github.com/rschu1ze)). +* Ignore exception when checking for cgroupsv2. [#64118](https://github.com/ClickHouse/ClickHouse/pull/64118) ([Robert Schulze](https://github.com/rschu1ze)). +* Add retries in git submodule update. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* See https://s3.amazonaws.com/clickhouse-test-reports/63946/86cf1e13d866333b8a511badd7f2fe186d810646/ast_fuzzer__ubsan_.html. [#64127](https://github.com/ClickHouse/ClickHouse/pull/64127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Refactoring of Server.h: Isolate server management from other logic. [#64132](https://github.com/ClickHouse/ClickHouse/pull/64132) ([TTPO100AJIEX](https://github.com/TTPO100AJIEX)). +* Syncing code. [#64135](https://github.com/ClickHouse/ClickHouse/pull/64135) ([Antonio Andelic](https://github.com/antonio2368)). +* Losen build resource limits for unusual architectures. [#64152](https://github.com/ClickHouse/ClickHouse/pull/64152) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix clang tidy. [#64179](https://github.com/ClickHouse/ClickHouse/pull/64179) ([Han Fei](https://github.com/hanfei1991)). +* Fix: 02124_insert_deduplication_token_multiple_blocks_replica. [#64181](https://github.com/ClickHouse/ClickHouse/pull/64181) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix global query profiler. [#64187](https://github.com/ClickHouse/ClickHouse/pull/64187) ([Azat Khuzhin](https://github.com/azat)). +* CI: cancel running PR wf after adding to MQ. [#64188](https://github.com/ClickHouse/ClickHouse/pull/64188) ([Max K.](https://github.com/maxknv)). +* Add profile events for number of rows read during/after prewhere. [#64198](https://github.com/ClickHouse/ClickHouse/pull/64198) ([Nikita Taranov](https://github.com/nickitat)). +* Add debug logging to EmbeddedRocksDBBulkSink. [#64203](https://github.com/ClickHouse/ClickHouse/pull/64203) ([vdimir](https://github.com/vdimir)). +* Fix special builds (due to excessive resource usage - memory/CPU). [#64204](https://github.com/ClickHouse/ClickHouse/pull/64204) ([Azat Khuzhin](https://github.com/azat)). +* Update InterpreterCreateQuery.cpp. [#64207](https://github.com/ClickHouse/ClickHouse/pull/64207) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove unused storage_snapshot field from MergeTreeSelectProcessor. [#64217](https://github.com/ClickHouse/ClickHouse/pull/64217) ([Alexander Gololobov](https://github.com/davenger)). +* Add test for [#37090](https://github.com/ClickHouse/ClickHouse/issues/37090). [#64220](https://github.com/ClickHouse/ClickHouse/pull/64220) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Small cli tool. [#64227](https://github.com/ClickHouse/ClickHouse/pull/64227) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Make `settings_changes_history` const. [#64230](https://github.com/ClickHouse/ClickHouse/pull/64230) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* test for [#45804](https://github.com/ClickHouse/ClickHouse/issues/45804). [#64245](https://github.com/ClickHouse/ClickHouse/pull/64245) ([Denny Crane](https://github.com/den-crane)). +* Update version after release. [#64283](https://github.com/ClickHouse/ClickHouse/pull/64283) ([Raúl Marín](https://github.com/Algunenano)). +* Followup for [#63691](https://github.com/ClickHouse/ClickHouse/issues/63691). [#64285](https://github.com/ClickHouse/ClickHouse/pull/64285) ([vdimir](https://github.com/vdimir)). +* CI: dependency fix for changelog.py. [#64293](https://github.com/ClickHouse/ClickHouse/pull/64293) ([Max K.](https://github.com/maxknv)). +* Print query in explain plan with parallel replicas. [#64298](https://github.com/ClickHouse/ClickHouse/pull/64298) ([vdimir](https://github.com/vdimir)). +* CI: Cancel sync wf on new push. [#64299](https://github.com/ClickHouse/ClickHouse/pull/64299) ([Max K.](https://github.com/maxknv)). +* CI: master workflow with folded jobs. [#64340](https://github.com/ClickHouse/ClickHouse/pull/64340) ([Max K.](https://github.com/maxknv)). +* CI: Sync, Merge check, CI gh's statuses fixes. [#64348](https://github.com/ClickHouse/ClickHouse/pull/64348) ([Max K.](https://github.com/maxknv)). +* Enable 02494_query_cache_nested_query_bug for Analyzer. [#64357](https://github.com/ClickHouse/ClickHouse/pull/64357) ([Robert Schulze](https://github.com/rschu1ze)). +* Rename allow_deprecated_functions to allow_deprecated_error_prone_window_functions. [#64358](https://github.com/ClickHouse/ClickHouse/pull/64358) ([Raúl Marín](https://github.com/Algunenano)). +* Change input_format_parquet_use_native_reader to 24.6. [#64359](https://github.com/ClickHouse/ClickHouse/pull/64359) ([Raúl Marín](https://github.com/Algunenano)). +* Update description for settings `cross_join_min_rows_to_compress` and `cross_join_min_bytes_to_compress`. [#64360](https://github.com/ClickHouse/ClickHouse/pull/64360) ([Nikita Fomichev](https://github.com/fm4v)). +* Changed the unreleased setting `aggregate_function_group_array_has_limit_size` to `aggregate_function_group_array_action_when_limit_is_reached`. [#64362](https://github.com/ClickHouse/ClickHouse/pull/64362) ([Raúl Marín](https://github.com/Algunenano)). +* Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts. [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)). +* Try to fix GWPAsan. [#64365](https://github.com/ClickHouse/ClickHouse/pull/64365) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: add secrets to reusable stage wf yml. [#64366](https://github.com/ClickHouse/ClickHouse/pull/64366) ([Max K.](https://github.com/maxknv)). +* Do not run tests tagged 'no-s3-storage-with-slow-build' with ASan. [#64367](https://github.com/ClickHouse/ClickHouse/pull/64367) ([vdimir](https://github.com/vdimir)). +* This change was reverted. [#64386](https://github.com/ClickHouse/ClickHouse/pull/64386) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Update s3queue.md. [#64389](https://github.com/ClickHouse/ClickHouse/pull/64389) ([Kseniia Sumarokova](https://github.com/kssenii)). +* test for [#64211](https://github.com/ClickHouse/ClickHouse/issues/64211). [#64390](https://github.com/ClickHouse/ClickHouse/pull/64390) ([Denny Crane](https://github.com/den-crane)). +* Follow-up to [#59767](https://github.com/ClickHouse/ClickHouse/issues/59767). [#64398](https://github.com/ClickHouse/ClickHouse/pull/64398) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove wrong comment. [#64403](https://github.com/ClickHouse/ClickHouse/pull/64403) ([Sergei Trifonov](https://github.com/serxa)). +* Follow up to [#59767](https://github.com/ClickHouse/ClickHouse/issues/59767). [#64404](https://github.com/ClickHouse/ClickHouse/pull/64404) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Refactor s3 settings (move settings parsing into single place). [#64412](https://github.com/ClickHouse/ClickHouse/pull/64412) ([Kseniia Sumarokova](https://github.com/kssenii)). +* This PR was reverted. [#64423](https://github.com/ClickHouse/ClickHouse/pull/64423) ([Sergei Trifonov](https://github.com/serxa)). +* Fix test after [#64404](https://github.com/ClickHouse/ClickHouse/issues/64404). [#64432](https://github.com/ClickHouse/ClickHouse/pull/64432) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Faster TestKeeper shutdown. [#64433](https://github.com/ClickHouse/ClickHouse/pull/64433) ([Alexander Gololobov](https://github.com/davenger)). +* Remove some logging. [#64434](https://github.com/ClickHouse/ClickHouse/pull/64434) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Revert "Revert "Remove some unnecessary UNREACHABLEs"". [#64435](https://github.com/ClickHouse/ClickHouse/pull/64435) ([Robert Schulze](https://github.com/rschu1ze)). +* Clean settings in 02943_variant_read_subcolumns test. [#64437](https://github.com/ClickHouse/ClickHouse/pull/64437) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a comment after [#64226](https://github.com/ClickHouse/ClickHouse/issues/64226). [#64449](https://github.com/ClickHouse/ClickHouse/pull/64449) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* CI: fix build_report selection in case of job reuse. [#64459](https://github.com/ClickHouse/ClickHouse/pull/64459) ([Max K.](https://github.com/maxknv)). +* Add Critical bugfix category in PR template. [#64480](https://github.com/ClickHouse/ClickHouse/pull/64480) ([Max K.](https://github.com/maxknv)). +* Remove `generateSnowflakeIDThreadMonotonic`. [#64499](https://github.com/ClickHouse/ClickHouse/pull/64499) ([Robert Schulze](https://github.com/rschu1ze)). +* Move analyzer attempt 2. [#64500](https://github.com/ClickHouse/ClickHouse/pull/64500) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Sync some code back from internal to public repository. [#64502](https://github.com/ClickHouse/ClickHouse/pull/64502) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove `generateUUIDv7(NonMonotonic|ThreadMonotonic)` functions. [#64506](https://github.com/ClickHouse/ClickHouse/pull/64506) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix bash completion for settings. [#64521](https://github.com/ClickHouse/ClickHouse/pull/64521) ([Azat Khuzhin](https://github.com/azat)). +* Use max_read_buffer_size for file descriptors as well in file(). [#64532](https://github.com/ClickHouse/ClickHouse/pull/64532) ([Azat Khuzhin](https://github.com/azat)). +* Temporarily disable `enable_vertical_final` setting by default. This feature should not be used in older releases because it [might crash](https://github.com/ClickHouse/ClickHouse/issues/64543), but it's already fixed in 24.6 where this setting change has been reverted and `enable_vertical_final` is again enabled by default. [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Removed excessive calls to `flush logs` and disabled under sanitizers. [#64550](https://github.com/ClickHouse/ClickHouse/pull/64550) ([Nikita Taranov](https://github.com/nickitat)). +* Sync code moved in private repo back back to public repo. [#64551](https://github.com/ClickHouse/ClickHouse/pull/64551) ([Robert Schulze](https://github.com/rschu1ze)). +* Add support for custom type to ASTLiteral, or else the type may be lost when parse the ast. E.g. set a ASTLiteral to DataTime32 with value 19870, then it will be parsed to Int16. [#64562](https://github.com/ClickHouse/ClickHouse/pull/64562) ([shuai.xu](https://github.com/shuai-xu)). +* Add a temporary known host for git over ssh. [#64569](https://github.com/ClickHouse/ClickHouse/pull/64569) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Cache first analysis result in ReadFromMergeTree. [#64579](https://github.com/ClickHouse/ClickHouse/pull/64579) ([Igor Nikonov](https://github.com/devcrafter)). +* Derive script parameters (labels) from the --repo/--from-repo - fix to not create backports for all release branches if backport for specific branch only. [#64603](https://github.com/ClickHouse/ClickHouse/pull/64603) ([Max K.](https://github.com/maxknv)). +* CI fixes. [#64605](https://github.com/ClickHouse/ClickHouse/pull/64605) ([Max K.](https://github.com/maxknv)). +* Double-checking [#59318](https://github.com/ClickHouse/ClickHouse/issues/59318) and docs for `Map`. [#64606](https://github.com/ClickHouse/ClickHouse/pull/64606) ([Robert Schulze](https://github.com/rschu1ze)). +* Update CHANGELOG.md. [#64609](https://github.com/ClickHouse/ClickHouse/pull/64609) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Tests: Convert numeric to symbolic error codes. [#64635](https://github.com/ClickHouse/ClickHouse/pull/64635) ([Robert Schulze](https://github.com/rschu1ze)). +* Move NamedCollectionsFactory into a separate file. [#64642](https://github.com/ClickHouse/ClickHouse/pull/64642) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Shuffle tests for parallel execution. [#64646](https://github.com/ClickHouse/ClickHouse/pull/64646) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* CI: Do not upload binaries for special builds in PRs. [#64653](https://github.com/ClickHouse/ClickHouse/pull/64653) ([Max K.](https://github.com/maxknv)). +* Update changelog. [#64654](https://github.com/ClickHouse/ClickHouse/pull/64654) ([Robert Schulze](https://github.com/rschu1ze)). +* Parallel replicas: simple cleanup. [#64655](https://github.com/ClickHouse/ClickHouse/pull/64655) ([Igor Nikonov](https://github.com/devcrafter)). +* Be more graceful with existing tables with `inverted` indexes. [#64656](https://github.com/ClickHouse/ClickHouse/pull/64656) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: Build Report Check to verify only enabled builds. [#64669](https://github.com/ClickHouse/ClickHouse/pull/64669) ([Max K.](https://github.com/maxknv)). +* Tests: Convert error numbers to symbolic error codes, pt. II. [#64670](https://github.com/ClickHouse/ClickHouse/pull/64670) ([Robert Schulze](https://github.com/rschu1ze)). +* Split query analyzer. [#64672](https://github.com/ClickHouse/ClickHouse/pull/64672) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* By the end of CI, CI_Running status must be SUCCESS or FAILURE never PENDING. [#64693](https://github.com/ClickHouse/ClickHouse/pull/64693) ([Max K.](https://github.com/maxknv)). +* The following list of merged PRs is not present in the release branch and was added to the changelog by mistake:. [#64704](https://github.com/ClickHouse/ClickHouse/pull/64704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* CI: MergeQueue: add binary_release and unit tests. [#64705](https://github.com/ClickHouse/ClickHouse/pull/64705) ([Max K.](https://github.com/maxknv)). +* Fix to get first good enough GH token instead of getting and comparing all of them. [#64709](https://github.com/ClickHouse/ClickHouse/pull/64709) ([Max K.](https://github.com/maxknv)). +* Check for missing Upload ID in CreateMultipartUpload reply. [#64714](https://github.com/ClickHouse/ClickHouse/pull/64714) ([Michael Kolupaev](https://github.com/al13n321)). +* Update version_date.tsv and changelogs after v24.5.1.1763-stable. [#64715](https://github.com/ClickHouse/ClickHouse/pull/64715) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix (unreleased) `loop()` table function crashing on empty table name. [#64716](https://github.com/ClickHouse/ClickHouse/pull/64716) ([Michael Kolupaev](https://github.com/al13n321)). +* Update CHANGELOG.md. [#64730](https://github.com/ClickHouse/ClickHouse/pull/64730) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: ci.py refactoring. [#64734](https://github.com/ClickHouse/ClickHouse/pull/64734) ([Max K.](https://github.com/maxknv)). +* Return the explanation for session moved error. [#64747](https://github.com/ClickHouse/ClickHouse/pull/64747) ([Antonio Andelic](https://github.com/antonio2368)). +* Adjust the version_helper and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not try to write columns.txt if it does not exist for write-once storages. [#64762](https://github.com/ClickHouse/ClickHouse/pull/64762) ([Azat Khuzhin](https://github.com/azat)). +* Update 02482_load_parts_refcounts.sh. [#64765](https://github.com/ClickHouse/ClickHouse/pull/64765) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix assert in IObjectStorageIteratorAsync. [#64770](https://github.com/ClickHouse/ClickHouse/pull/64770) ([Michael Kolupaev](https://github.com/al13n321)). +* Make table functions always report engine 'StorageProxy' in system.tables. [#64771](https://github.com/ClickHouse/ClickHouse/pull/64771) ([Michael Kolupaev](https://github.com/al13n321)). +* Ask about company name on GitHub. [#64774](https://github.com/ClickHouse/ClickHouse/pull/64774) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky tests about SQLite. [#64776](https://github.com/ClickHouse/ClickHouse/pull/64776) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove iostream debug helpers. [#64777](https://github.com/ClickHouse/ClickHouse/pull/64777) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove unnecessary comment. [#64785](https://github.com/ClickHouse/ClickHouse/pull/64785) ([Raúl Marín](https://github.com/Algunenano)). +* Follow-ups to some PRs. [#64787](https://github.com/ClickHouse/ClickHouse/pull/64787) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Attempt to fix 02228_merge_tree_insert_memory_usage.sql flakiness for s3. [#64800](https://github.com/ClickHouse/ClickHouse/pull/64800) ([Raúl Marín](https://github.com/Algunenano)). +* Add regression test for filter propagation through `Merge` engine. [#64806](https://github.com/ClickHouse/ClickHouse/pull/64806) ([Nikita Taranov](https://github.com/nickitat)). +* Migrate changelog.py to a descendant of fuzzywuzzy. [#64807](https://github.com/ClickHouse/ClickHouse/pull/64807) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* A follow-up for https://github.com/ClickHouse/ClickHouse/pull/64039 and [#64759](https://github.com/ClickHouse/ClickHouse/issues/64759). [#64813](https://github.com/ClickHouse/ClickHouse/pull/64813) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Make row order optimization non-experimental. [#64814](https://github.com/ClickHouse/ClickHouse/pull/64814) ([Robert Schulze](https://github.com/rschu1ze)). +* Didn't catch it at the time when all versions belonged to the current year. [#64817](https://github.com/ClickHouse/ClickHouse/pull/64817) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix clang-tidy build. [#64823](https://github.com/ClickHouse/ClickHouse/pull/64823) ([Robert Schulze](https://github.com/rschu1ze)). +* Sets all builds that we run tests on to normal build list. [#64824](https://github.com/ClickHouse/ClickHouse/pull/64824) ([Max K.](https://github.com/maxknv)). +* CI: fix CI await feature. [#64825](https://github.com/ClickHouse/ClickHouse/pull/64825) ([Max K.](https://github.com/maxknv)). +* Fix clang-tidy. [#64827](https://github.com/ClickHouse/ClickHouse/pull/64827) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Upload blob_storage_log from stateless tests. [#64843](https://github.com/ClickHouse/ClickHouse/pull/64843) ([alesapin](https://github.com/alesapin)). +* Follow-up to [#64349](https://github.com/ClickHouse/ClickHouse/issues/64349). [#64845](https://github.com/ClickHouse/ClickHouse/pull/64845) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Simplify handling of old 'inverted' indexes. [#64846](https://github.com/ClickHouse/ClickHouse/pull/64846) ([Robert Schulze](https://github.com/rschu1ze)). +* Use issue templates defined in YAML provide more user-friendly experience. [#64850](https://github.com/ClickHouse/ClickHouse/pull/64850) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Handle logs from rocksdb by ClickHouse internal logging. [#64856](https://github.com/ClickHouse/ClickHouse/pull/64856) ([Azat Khuzhin](https://github.com/azat)). +* Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59357. [#64860](https://github.com/ClickHouse/ClickHouse/pull/64860) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* added mlock and mlockall to aspell-dict to be ignored. [#64863](https://github.com/ClickHouse/ClickHouse/pull/64863) ([Ali](https://github.com/xogoodnow)). +* A tiny fix for fancy quotes. [#64883](https://github.com/ClickHouse/ClickHouse/pull/64883) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible loss of "Query was cancelled" message in client. [#64888](https://github.com/ClickHouse/ClickHouse/pull/64888) ([Azat Khuzhin](https://github.com/azat)). +* We accidentally lost the way to set `PR Check` failure at some point. [#64890](https://github.com/ClickHouse/ClickHouse/pull/64890) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix global trace collector. [#64896](https://github.com/ClickHouse/ClickHouse/pull/64896) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix test_mask_sensitive_info/test.py::test_create_table. [#64901](https://github.com/ClickHouse/ClickHouse/pull/64901) ([Azat Khuzhin](https://github.com/azat)). +* Update 03165_string_functions_with_token_text_indexes.sql. [#64903](https://github.com/ClickHouse/ClickHouse/pull/64903) ([Alexander Tokmakov](https://github.com/tavplubix)). +* When the branch is removed, it's impossible to get the diff by the labels. `print` in imported files spoils the `ipython` output. [#64904](https://github.com/ClickHouse/ClickHouse/pull/64904) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable transactions for unsupported storages even for materialized v…. [#64918](https://github.com/ClickHouse/ClickHouse/pull/64918) ([alesapin](https://github.com/alesapin)). +* additional log for cleanupDetachedTables. [#64919](https://github.com/ClickHouse/ClickHouse/pull/64919) ([Konstantin Morozov](https://github.com/k-morozov)). +* Fix tupleConcat of two empty tuples. This fixes [#64885](https://github.com/ClickHouse/ClickHouse/issues/64885). [#64923](https://github.com/ClickHouse/ClickHouse/pull/64923) ([Amos Bird](https://github.com/amosbird)). +* CI: Minor fixes in ci scripts. [#64950](https://github.com/ClickHouse/ClickHouse/pull/64950) ([Max K.](https://github.com/maxknv)). +* Fix error message (it was strange). [#64952](https://github.com/ClickHouse/ClickHouse/pull/64952) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update fmtlib version to 9.1.0. [#64959](https://github.com/ClickHouse/ClickHouse/pull/64959) ([Duc Canh Le](https://github.com/canhld94)). +* Test 02908_many_requests_to_system_replicas makes a lot of heavy requests and it overloads server if it's an ASAN build. [#64966](https://github.com/ClickHouse/ClickHouse/pull/64966) ([Alexander Gololobov](https://github.com/davenger)). +* Fix (unreleased) bug in short circuit evaluation. [#64967](https://github.com/ClickHouse/ClickHouse/pull/64967) ([Raúl Marín](https://github.com/Algunenano)). +* Update version_date.tsv and changelogs after v24.4.2.141-stable. [#64968](https://github.com/ClickHouse/ClickHouse/pull/64968) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix `test_attach_partition_using_copy`. [#64977](https://github.com/ClickHouse/ClickHouse/pull/64977) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Faster processing of scheduler queue activations. [#64985](https://github.com/ClickHouse/ClickHouse/pull/64985) ([Sergei Trifonov](https://github.com/serxa)). +* CI: Fix nightly workflow. [#64987](https://github.com/ClickHouse/ClickHouse/pull/64987) ([Max K.](https://github.com/maxknv)). +* Fix innocuous data race in detectLanguage. [#64988](https://github.com/ClickHouse/ClickHouse/pull/64988) ([Raúl Marín](https://github.com/Algunenano)). +* CI: Builds in CI settings. [#64994](https://github.com/ClickHouse/ClickHouse/pull/64994) ([Max K.](https://github.com/maxknv)). +* REVERTED. [#65009](https://github.com/ClickHouse/ClickHouse/pull/65009) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* CI: Fix backports. [#65010](https://github.com/ClickHouse/ClickHouse/pull/65010) ([Max K.](https://github.com/maxknv)). +* Try fix 03143_prewhere_profile_events. [#65014](https://github.com/ClickHouse/ClickHouse/pull/65014) ([Nikita Taranov](https://github.com/nickitat)). +* Fix 03165_string_functions_with_token_text_indexes. [#65018](https://github.com/ClickHouse/ClickHouse/pull/65018) ([Julia Kartseva](https://github.com/jkartseva)). +* This change was reverted. [#65028](https://github.com/ClickHouse/ClickHouse/pull/65028) ([Sergei Trifonov](https://github.com/serxa)). +* Bump googletest to latest HEAD. [#65038](https://github.com/ClickHouse/ClickHouse/pull/65038) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve comment about AsynchronousMetrics. [#65040](https://github.com/ClickHouse/ClickHouse/pull/65040) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: Remove fuzzer build from normal CI run (bugfix). [#65041](https://github.com/ClickHouse/ClickHouse/pull/65041) ([Max K.](https://github.com/maxknv)). +* CI config refactoring. [#65045](https://github.com/ClickHouse/ClickHouse/pull/65045) ([Max K.](https://github.com/maxknv)). +* Bump abseil to latest HEAD. [#65048](https://github.com/ClickHouse/ClickHouse/pull/65048) ([Robert Schulze](https://github.com/rschu1ze)). +* Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)). +* Stateless tests: add test for SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT. [#65056](https://github.com/ClickHouse/ClickHouse/pull/65056) ([Nikita Fomichev](https://github.com/fm4v)). +* Increase timeout in wait_for_all_mutations. [#65058](https://github.com/ClickHouse/ClickHouse/pull/65058) ([Alexander Gololobov](https://github.com/davenger)). +* Tests for _time virtual column in file alike storages. [#65064](https://github.com/ClickHouse/ClickHouse/pull/65064) ([Ilya Golshtein](https://github.com/ilejn)). +* Update odbc-bridge.md. [#65099](https://github.com/ClickHouse/ClickHouse/pull/65099) ([Alexander Gololobov](https://github.com/davenger)). +* Small fix for 02340_parts_refcnt_mergetree. [#65105](https://github.com/ClickHouse/ClickHouse/pull/65105) ([Nikita Taranov](https://github.com/nickitat)). +* Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)). +* Update test_replicated_database/test.py. [#65112](https://github.com/ClickHouse/ClickHouse/pull/65112) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix false positives leaky memory warnings in OpenSSL. [#65125](https://github.com/ClickHouse/ClickHouse/pull/65125) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `Initiator received more initial requests than there are replicas` with `loop` engine. [#65133](https://github.com/ClickHouse/ClickHouse/pull/65133) ([Nikita Taranov](https://github.com/nickitat)). +* Fix 'Tasks in BackgroundSchedulePool cannot throw' caused by MergeTreeData::loadUnexpectedDataParts(). [#65135](https://github.com/ClickHouse/ClickHouse/pull/65135) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix bad error message. [#65137](https://github.com/ClickHouse/ClickHouse/pull/65137) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Just fixing flaky unit tests. [#65152](https://github.com/ClickHouse/ClickHouse/pull/65152) ([Sema Checherinda](https://github.com/CheSema)). +* This change was reverted. [#65164](https://github.com/ClickHouse/ClickHouse/pull/65164) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Ensure submodules are named consistently. [#65167](https://github.com/ClickHouse/ClickHouse/pull/65167) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove obsolete fix from aws submodule. [#65168](https://github.com/ClickHouse/ClickHouse/pull/65168) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: Fix not-merged cherry-picks for backports. [#65181](https://github.com/ClickHouse/ClickHouse/pull/65181) ([Max K.](https://github.com/maxknv)). +* Add an assertion in ReplicatedMergeTreeQueue. [#65184](https://github.com/ClickHouse/ClickHouse/pull/65184) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bug in unreleased code. [#65185](https://github.com/ClickHouse/ClickHouse/pull/65185) ([Raúl Marín](https://github.com/Algunenano)). +* Fix docs for skipping-indexes.md. [#65194](https://github.com/ClickHouse/ClickHouse/pull/65194) ([morning-color](https://github.com/morning-color)). +* Fix the descriptions of some server settings. [#65200](https://github.com/ClickHouse/ClickHouse/pull/65200) ([Raúl Marín](https://github.com/Algunenano)). +* Fix issue after [#64813](https://github.com/ClickHouse/ClickHouse/issues/64813) with broken search in the changelog, and missing zstd in a style-check image. [#65202](https://github.com/ClickHouse/ClickHouse/pull/65202) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix bug in unreleased code. [#65203](https://github.com/ClickHouse/ClickHouse/pull/65203) ([Raúl Marín](https://github.com/Algunenano)). +* Add test prewhere merge. [#65207](https://github.com/ClickHouse/ClickHouse/pull/65207) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Sync ProfileEvents.h. [#65208](https://github.com/ClickHouse/ClickHouse/pull/65208) ([Kseniia Sumarokova](https://github.com/kssenii)). +* FinishCheck to set failure if workflow failed. [#65228](https://github.com/ClickHouse/ClickHouse/pull/65228) ([Max K.](https://github.com/maxknv)). +* Update version_date.tsv and changelogs after v24.3.4.147-lts. [#65235](https://github.com/ClickHouse/ClickHouse/pull/65235) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v24.5.3.5-stable. [#65240](https://github.com/ClickHouse/ClickHouse/pull/65240) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fails sometimes for debug build https://s3.amazonaws.com/clickhouse-test-reports/0/af6afd904316bfb771737faa147ce8aea72dd705/stateless_tests__debug__[4_5].html. [#65245](https://github.com/ClickHouse/ClickHouse/pull/65245) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix libunwind in CI. [#65247](https://github.com/ClickHouse/ClickHouse/pull/65247) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: Do not skip FinishCheck in Merge Queue. [#65249](https://github.com/ClickHouse/ClickHouse/pull/65249) ([Max K.](https://github.com/maxknv)). +* Add a test just in case. [#65271](https://github.com/ClickHouse/ClickHouse/pull/65271) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable 02581_share_big_sets_between_multiple_mutations_tasks_long in coverage run. [#65295](https://github.com/ClickHouse/ClickHouse/pull/65295) ([Alexander Gololobov](https://github.com/davenger)). +* Update version_date.tsv and changelogs after v23.8.15.35-lts. [#65300](https://github.com/ClickHouse/ClickHouse/pull/65300) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* mute test test_query_is_canceled_with_inf_retries. [#65301](https://github.com/ClickHouse/ClickHouse/pull/65301) ([Sema Checherinda](https://github.com/CheSema)). +* Fix silly typo that caused wrong tags messages. [#65307](https://github.com/ClickHouse/ClickHouse/pull/65307) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Save server data for failed stateless tests. [#65309](https://github.com/ClickHouse/ClickHouse/pull/65309) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 01246_buffer_flush flakiness (by tuning timeouts). [#65310](https://github.com/ClickHouse/ClickHouse/pull/65310) ([Azat Khuzhin](https://github.com/azat)). +* Remove outdated override in stress tests. [#65323](https://github.com/ClickHouse/ClickHouse/pull/65323) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad code in `system.session_log`. [#65332](https://github.com/ClickHouse/ClickHouse/pull/65332) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* add tests for 'boom filter index with map'. [#65333](https://github.com/ClickHouse/ClickHouse/pull/65333) ([iceFireser](https://github.com/iceFireser)). +* Fix crash in 03036_dynamic_read_subcolumns. [#65341](https://github.com/ClickHouse/ClickHouse/pull/65341) ([Kruglov Pavel](https://github.com/Avogar)). +* Move tests 02942_variant_cast and 02944_variant_as_common_type to analyzer_tech_debt.txt. [#65342](https://github.com/ClickHouse/ClickHouse/pull/65342) ([Kruglov Pavel](https://github.com/Avogar)). +* REVERTED. [#65384](https://github.com/ClickHouse/ClickHouse/pull/65384) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* CI: Add Non-blocking (Woolen wolfdog) CI mode. [#65385](https://github.com/ClickHouse/ClickHouse/pull/65385) ([Max K.](https://github.com/maxknv)). +* Fix compatibility release check. [#65394](https://github.com/ClickHouse/ClickHouse/pull/65394) ([Alexey Katsman](https://github.com/alexkats)). +* Move a leaksan suppression from Poco into OpenSSL. [#65396](https://github.com/ClickHouse/ClickHouse/pull/65396) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix tidy build. [#65415](https://github.com/ClickHouse/ClickHouse/pull/65415) ([Sergei Trifonov](https://github.com/serxa)). +* Remove Tests dependency on Builds_2. No tests depend on Builds_2. [#65416](https://github.com/ClickHouse/ClickHouse/pull/65416) ([Max K.](https://github.com/maxknv)). +* CI: PR workflow dependencies fix. [#65442](https://github.com/ClickHouse/ClickHouse/pull/65442) ([Max K.](https://github.com/maxknv)). +* Fix test_storage_s3_queue/test.py::test_max_set_age. [#65452](https://github.com/ClickHouse/ClickHouse/pull/65452) ([Kseniia Sumarokova](https://github.com/kssenii)). +* CI: Rename A Sync status. [#65456](https://github.com/ClickHouse/ClickHouse/pull/65456) ([Max K.](https://github.com/maxknv)). +* CI: Rename sync status. [#65464](https://github.com/ClickHouse/ClickHouse/pull/65464) ([Max K.](https://github.com/maxknv)). +* This change was reverted. [#65466](https://github.com/ClickHouse/ClickHouse/pull/65466) ([Sergei Trifonov](https://github.com/serxa)). +* Remove a feature wasn't part of any release yet. [#65480](https://github.com/ClickHouse/ClickHouse/pull/65480) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#65657](https://github.com/ClickHouse/ClickHouse/issues/65657): Fix of `PlanSquashingTransform`: pipeline stuck. [#65487](https://github.com/ClickHouse/ClickHouse/pull/65487) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Backported in [#65504](https://github.com/ClickHouse/ClickHouse/issues/65504): Fix bad test `02922_deduplication_with_zero_copy`. [#65492](https://github.com/ClickHouse/ClickHouse/pull/65492) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#65591](https://github.com/ClickHouse/ClickHouse/issues/65591): Setting `uniform_snowflake_conversion_functions` (not in any release yet) was replaced by setting `allow_deprecated_snowflake_conversion_functions`. The latter controls if the legacy snowflake conversion functions are available (by default, they are not). [#65522](https://github.com/ClickHouse/ClickHouse/pull/65522) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#65759](https://github.com/ClickHouse/ClickHouse/issues/65759): Renames Build report jobs. [#65554](https://github.com/ClickHouse/ClickHouse/pull/65554) ([Max K.](https://github.com/maxknv)). +* Backported in [#65773](https://github.com/ClickHouse/ClickHouse/issues/65773): `base64En/Decode64Url` --> `base64En/Decode64URL`. [#65760](https://github.com/ClickHouse/ClickHouse/pull/65760) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#65805](https://github.com/ClickHouse/ClickHouse/issues/65805): CI: Fix for Builds report job in backports and releases. [#65774](https://github.com/ClickHouse/ClickHouse/pull/65774) ([Max K.](https://github.com/maxknv)). + diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index c348eb5ca07..c283cfbf4c2 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -91,6 +91,9 @@ cd ./utils/check-style # Check python type hinting with mypy ./check-mypy +# Check python with flake8 +./check-flake8 + # Check code with codespell ./check-typos diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index ec5760541e8..0a1fe58b16f 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -267,7 +267,7 @@ A pull request can be created even if the work is not completed yet. In this cas Testing will commence as soon as ClickHouse employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. -The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear). +The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “Builds” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear). Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page, you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways. diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index bbc7dac0a2a..269995a1a96 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -28,7 +28,7 @@ run, for example, the test `01428_hash_set_nan_key`, change to the repository folder and run the following command: ``` -PATH=$PATH: tests/clickhouse-test 01428_hash_set_nan_key +PATH=:$PATH tests/clickhouse-test 01428_hash_set_nan_key ``` Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which @@ -229,6 +229,10 @@ For production builds, clang is used, but we also test make gcc builds. For deve ## Sanitizers {#sanitizers} +:::note +If the process (ClickHouse server or client) crashes at startup when running it locally, you might need to disable address space layout randomization: `sudo sysctl kernel.randomize_va_space=0` +::: + ### Address sanitizer We run functional, integration, stress and unit tests under ASan on per-commit basis. diff --git a/docs/en/engines/table-engines/integrations/azureBlobStorage.md b/docs/en/engines/table-engines/integrations/azureBlobStorage.md index 0843ff1ac47..bdf96832e9d 100644 --- a/docs/en/engines/table-engines/integrations/azureBlobStorage.md +++ b/docs/en/engines/table-engines/integrations/azureBlobStorage.md @@ -54,6 +54,16 @@ SELECT * FROM test_table; - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. + +## Authentication + +Currently there are 3 ways to authenticate: +- `Managed Identity` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`. +- `SAS Token` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`. It is identified by presence of '?' in the url. +- `Workload Identity` - Can be used by providing an `endpoint` or `storage_account_url`. If `use_workload_identity` parameter is set in config, ([workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications)) is used for authentication. + + ## See also diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 2749fa7e479..c9df713231a 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -235,6 +235,7 @@ libhdfs3 support HDFS namenode HA. - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/engines/table-engines/integrations/iceberg.md b/docs/en/engines/table-engines/integrations/iceberg.md index 9d6395f73ac..21fdbc0b1a5 100644 --- a/docs/en/engines/table-engines/integrations/iceberg.md +++ b/docs/en/engines/table-engines/integrations/iceberg.md @@ -37,7 +37,7 @@ Using named collections: http://test.s3.amazonaws.com/clickhouse-bucket/ - test + test test diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index f87e8da8b5b..5bb3bc752f5 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -34,10 +34,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name - `options` — MongoDB connection string options (optional parameter). :::tip -If you are using the MongoDB Atlas cloud offering please add these options: +If you are using the MongoDB Atlas cloud offering: ``` -'connectTimeoutMS=10000&ssl=true&authSource=admin' +- connection url can be obtained from 'Atlas SQL' option +- use options: 'connectTimeoutMS=10000&ssl=true&authSource=admin' ``` ::: diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md index 9f7409a6893..78ce537224c 100644 --- a/docs/en/engines/table-engines/integrations/nats.md +++ b/docs/en/engines/table-engines/integrations/nats.md @@ -58,7 +58,7 @@ Optional parameters: - `nats_max_reconnect` – Maximum amount of reconnection attempts per try to connect to NATS. Default: `5`. - `nats_reconnect_wait` – Amount of time in milliseconds to sleep between each reconnect attempt. Default: `5000`. - `nats_server_list` - Server list for connection. Can be specified to connect to NATS cluster. -- `nats_skip_broken_messages` - NATS message parser tolerance to schema-incompatible messages per block. Default: `0`. If `nats_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). +- `nats_skip_broken_messages` - NATS message parser tolerance to schema-incompatible messages per block. Default: `0`. If `nats_skip_broken_messages = N` then the engine skips *N* NATS messages that cannot be parsed (a message equals a row of data). - `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size). - `nats_flush_interval_ms` - Timeout for flushing data read from NATS. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). - `nats_username` - NATS username. @@ -67,7 +67,7 @@ Optional parameters: - `nats_credential_file` - Path to a NATS credentials file. - `nats_startup_connect_tries` - Number of connect tries at startup. Default: `5`. - `nats_max_rows_per_message` — The maximum number of rows written in one NATS message for row-based formats. (default : `1`). -- `nats_handle_error_mode` — How to handle errors for RabbitMQ engine. Possible values: default (the exception will be thrown if we fail to parse a message), stream (the exception message and raw message will be saved in virtual columns `_error` and `_raw_message`). +- `nats_handle_error_mode` — How to handle errors for NATS engine. Possible values: default (the exception will be thrown if we fail to parse a message), stream (the exception message and raw message will be saved in virtual columns `_error` and `_raw_message`). SSL connection: diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index cb1da1c8e68..93f4a187656 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -53,14 +53,14 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da This example uses the [docker compose recipe](https://github.com/ClickHouse/examples/tree/5fdc6ff72f4e5137e23ea075c88d3f44b0202490/docker-compose-recipes/recipes/ch-and-minio-S3), which integrates ClickHouse and MinIO. You should be able to reproduce the same queries using S3 by replacing the endpoint and authentication values. -Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter token `{_partition_id}` as part of the S3 object (filename), and that the SELECT queries select against those resulting object names (e.g., `test_3.csv`). +Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter token `{_partition_id}` as part of the S3 object (filename), and that the SELECT queries select against those resulting object names (e.g., `test_3.csv`). :::note As shown in the example, querying from S3 tables that are partitioned is not directly supported at this time, but can be accomplished by querying the individual partitions using the S3 table function. -The primary use-case for writing +The primary use-case for writing partitioned data in S3 is to enable transferring that data into another ClickHouse system (for example, moving from on-prem systems to ClickHouse Cloud). Because ClickHouse datasets are often very large, and network @@ -78,9 +78,9 @@ CREATE TABLE p ) ENGINE = S3( # highlight-next-line - 'http://minio:10000/clickhouse//test_{_partition_id}.csv', - 'minioadmin', - 'minioadminpassword', + 'http://minio:10000/clickhouse//test_{_partition_id}.csv', + 'minioadmin', + 'minioadminpassword', 'CSV') PARTITION BY column3 ``` @@ -145,6 +145,7 @@ Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading fr - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index aa7fa512480..11181703645 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -13,7 +13,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec CREATE TABLE s3_queue_engine_table (name String, value UInt32) ENGINE = S3Queue(path, [NOSIGN, | aws_access_key_id, aws_secret_access_key,] format, [compression]) [SETTINGS] - [mode = 'unordered',] + [mode = '',] [after_processing = 'keep',] [keeper_path = '',] [s3queue_loading_retries = 0,] @@ -28,6 +28,8 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32) [s3queue_cleanup_interval_max_ms = 30000,] ``` +Starting with `24.7` settings without `s3queue_` prefix are also supported. + **Engine parameters** - `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). @@ -75,7 +77,7 @@ Possible values: - unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper. - ordered — With ordered mode, only the max name of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. -Default value: `unordered`. +Default value: `ordered` in versions before 24.6. Starting with 24.6 there is no default value, the setting becomes required to be specified manually. For tables created on earlier versions the default value will remain `Ordered` for compatibility. ### after_processing {#after_processing} @@ -181,6 +183,10 @@ For 'Ordered' mode. Defines a maximum boundary for reschedule interval for a bac Default value: `30000`. +### s3queue_buckets {#buckets} + +For 'Ordered' mode. Available since `24.6`. If there are several replicas of S3Queue table, each working with the same metadata directory in keeper, the value of `s3queue_buckets` needs to be equal to at least the number of replicas. If `s3queue_processing_threads` setting is used as well, it makes sense to increase the value of `s3queue_buckets` setting even further, as it defines the actual parallelism of `S3Queue` processing. + ## S3-related Settings {#s3-settings} Engine supports all s3 related settings. For more information about S3 settings see [here](../../../engines/table-engines/integrations/s3.md). @@ -267,7 +273,7 @@ For introspection use `system.s3queue` stateless table and `system.s3queue_log` `exception` String ) ENGINE = SystemS3Queue -COMMENT 'SYSTEM TABLE is built on the fly.' │ +COMMENT 'Contains in-memory state of S3Queue metadata and currently processed rows per file.' │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 689c05a24af..f0c4e1b0e34 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -6,41 +6,32 @@ sidebar_label: MergeTree # MergeTree -The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most commonly used and most robust ClickHouse table engines. +The `MergeTree` engine and other engines of the `MergeTree` family (e.g. `ReplacingMergeTree`, `AggregatingMergeTree` ) are the most commonly used and most robust table engines in ClickHouse. -Engines in the `MergeTree` family are designed for inserting a very large amount of data into a table. The data is quickly written to the table part by part, then rules are applied for merging the parts in the background. This method is much more efficient than continually rewriting the data in storage during insert. +`MergeTree`-family table engines are designed for high data ingest rates and huge data volumes. +Insert operations create table parts which are merged by a background process with other table parts. -Main features: +Main features of `MergeTree`-family table engines. -- Stores data sorted by primary key. +- The table's primary key determines the sort order within each table part (clustered index). The primary key also does not reference individual rows but blocks of 8192 rows called granules. This makes primary keys of huge data sets small enough to remain loaded in main memory, while still providing fast access to on-disk data. - This allows you to create a small sparse index that helps find data faster. +- Tables can be partitioned using an arbitrary partition expression. Partition pruning ensures partitions are omitted from reading when the query allows it. -- Partitions can be used if the [partitioning key](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified. +- Data can be replicated across multiple cluster nodes for high availability, failover, and zero downtime upgrades. See [Data replication](/docs/en/engines/table-engines/mergetree-family/replication.md). - ClickHouse supports certain operations with partitions that are more efficient than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. +- `MergeTree` table engines support various statistics kinds and sampling methods to help query optimization. -- Data replication support. - - The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](/docs/en/engines/table-engines/mergetree-family/replication.md). - -- Data sampling support. - - If necessary, you can set the data sampling method in the table. - -:::info -The [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine does not belong to the `*MergeTree` family. +:::note +Despite a similar name, the [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine is different from `*MergeTree` engines. ::: -If you need to update rows frequently, we recommend using the [`ReplacingMergeTree`](/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md) table engine. Using `ALTER TABLE my_table UPDATE` to update rows triggers a mutation, which causes parts to be re-written and uses IO/resources. With `ReplacingMergeTree`, you can simply insert the updated rows and the old rows will be replaced according to the table sorting key. - -## Creating a Table {#table_engine-mergetree-creating-a-table} +## Creating Tables {#table_engine-mergetree-creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)], - name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)], + name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTICS(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)], + name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTICS(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)], ... INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1], INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2], @@ -59,23 +50,24 @@ ORDER BY expr [SETTINGS name = value, ...] ``` -For a description of parameters, see the [CREATE query description](/docs/en/sql-reference/statements/create/table.md). +For a detailed description of the parameters, see the [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md) statement ### Query Clauses {#mergetree-query-clauses} #### ENGINE -`ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters. +`ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine has no parameters. #### ORDER_BY `ORDER BY` — The sorting key. -A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`. +A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID + 1, EventDate)`. -ClickHouse uses the sorting key as a primary key if the primary key is not defined explicitly by the `PRIMARY KEY` clause. +If no primary key is defined (i.e. `PRIMARY KEY` was not specified), ClickHouse uses the the sorting key as primary key. -Use the `ORDER BY tuple()` syntax, if you do not need sorting, or set `create_table_empty_primary_key_by_default` to `true` to use the `ORDER BY tuple()` syntax by default. See [Selecting the Primary Key](#selecting-the-primary-key). +If no sorting is required, you can use syntax `ORDER BY tuple()`. +Alternatively, if setting `create_table_empty_primary_key_by_default` is enabled, `ORDER BY tuple()` is implicitly added to `CREATE TABLE` statements. See [Selecting a Primary Key](#selecting-a-primary-key). #### PARTITION BY @@ -87,100 +79,32 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional. -By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause. +Specifying a sorting key (using `ORDER BY` clause) implicitly specifies a primary key. +It is usually not necessary to specify the primary key in addition to the primary key. #### SAMPLE BY -`SAMPLE BY` — An expression for sampling. Optional. +`SAMPLE BY` — A sampling expression. Optional. -If a sampling expression is used, the primary key must contain it. The result of a sampling expression must be an unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. +If specified, it must be contained in the primary key. +The sampling expression must result in an unsigned integer. + +Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. #### TTL -`TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional. +`TTL` — A list of rules that specify the storage duration of rows and the logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional. -Expression must have one `Date` or `DateTime` column as a result. Example: -``` -TTL date + INTERVAL 1 DAY -``` +Expression must result in a `Date` or `DateTime`, e.g. `TTL date + INTERVAL 1 DAY`. Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can be specified, but there should be no more than one `DELETE` rule. + For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl) -### SETTINGS -Additional parameters that control the behavior of the `MergeTree` (optional): +#### SETTINGS -#### index_granularity - -`index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage). - -#### index_granularity_bytes - -`index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage). - -#### min_index_granularity_bytes - -`min_index_granularity_bytes` — Min allowed size of data granules in bytes. Default value: 1024b. To provide a safeguard against accidentally creating tables with very low index_granularity_bytes. See [Data Storage](#mergetree-data-storage). - -#### enable_mixed_granularity_parts - -`enable_mixed_granularity_parts` — Enables or disables transitioning to control the granule size with the `index_granularity_bytes` setting. Before version 19.11, there was only the `index_granularity` setting for restricting granule size. The `index_granularity_bytes` setting improves ClickHouse performance when selecting data from tables with big rows (tens and hundreds of megabytes). If you have tables with big rows, you can enable this setting for the tables to improve the efficiency of `SELECT` queries. - -#### use_minimalistic_part_header_in_zookeeper - -`use_minimalistic_part_header_in_zookeeper` — Storage method of the data parts headers in ZooKeeper. If `use_minimalistic_part_header_in_zookeeper=1`, then ZooKeeper stores less data. For more information, see the [setting description](/docs/en/operations/server-configuration-parameters/settings.md/#server-settings-use_minimalistic_part_header_in_zookeeper) in “Server configuration parameters”. - -#### min_merge_bytes_to_use_direct_io - -`min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes. - -#### merge_with_ttl_timeout - -`merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with delete TTL. Default value: `14400` seconds (4 hours). -#### merge_with_recompression_ttl_timeout - -`merge_with_recompression_ttl_timeout` — Minimum delay in seconds before repeating a merge with recompression TTL. Default value: `14400` seconds (4 hours). - -#### try_fetch_recompressed_part_timeout - -`try_fetch_recompressed_part_timeout` — Timeout (in seconds) before starting merge with recompression. During this time ClickHouse tries to fetch recompressed part from replica which assigned this merge with recompression. Default value: `7200` seconds (2 hours). - -#### write_final_mark - -`write_final_mark` — Enables or disables writing the final index mark at the end of data part (after the last byte). Default value: 1. Don’t turn it off. - -#### merge_max_block_size - -`merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192. - -#### storage_policy - -`storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes). - -#### min_bytes_for_wide_part - -`min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage). - -#### max_parts_in_total - -`max_parts_in_total` — Maximum number of parts in all partitions. - -#### max_compress_block_size - -`max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](/docs/en/operations/settings/settings.md/#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. - -#### min_compress_block_size - -`min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](/docs/en/operations/settings/settings.md/#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. - -#### max_partitions_to_read - -`max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](/docs/en/operations/settings/merge-tree-settings.md/#max-partitions-to-read) in the global setting. - -#### allow_experimental_optimized_row_order - -`allow_experimental_optimized_row_order` - Experimental. Enables the optimization of the row order during inserts to improve the compressability of the data for compression codecs (e.g. LZ4). Analyzes and reorders the data, and thus increases the CPU overhead of inserts. +See [MergeTree Settings](../../../operations/settings/merge-tree-settings.md). **Example of Sections Setting** @@ -270,7 +194,7 @@ ClickHouse does not require a unique primary key. You can insert multiple rows w You can use `Nullable`-typed expressions in the `PRIMARY KEY` and `ORDER BY` clauses but it is strongly discouraged. To allow this feature, turn on the [allow_nullable_key](/docs/en/operations/settings/settings.md/#allow-nullable-key) setting. The [NULLS_LAST](/docs/en/sql-reference/statements/select/order-by.md/#sorting-of-special-values) principle applies for `NULL` values in the `ORDER BY` clause. -### Selecting the Primary Key {#selecting-the-primary-key} +### Selecting a Primary Key {#selecting-a-primary-key} The number of columns in the primary key is not explicitly limited. Depending on the data structure, you can include more or fewer columns in the primary key. This may: @@ -1043,12 +967,12 @@ ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [ ## Column Statistics (Experimental) {#column-statistics} -The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`. +The statistics declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistics = 1`. ``` sql CREATE TABLE tab ( - a Int64 STATISTIC(tdigest), + a Int64 STATISTICS(TDigest, Uniq), b Float64 ) ENGINE = MergeTree @@ -1058,19 +982,23 @@ ORDER BY a We can also manipulate statistics with `ALTER` statements. ```sql -ALTER TABLE tab ADD STATISTIC b TYPE tdigest; -ALTER TABLE tab DROP STATISTIC a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS b TYPE TDigest, Uniq; +ALTER TABLE tab DROP STATISTICS a; ``` -These lightweight statistics aggregate information about distribution of values in columns. -They can be used for query optimization when we enable `set allow_statistic_optimize = 1`. +These lightweight statistics aggregate information about distribution of values in columns. Statistics are stored in every part and updated when every insert comes. +They can be used for prewhere optimization only if we enable `set allow_statistics_optimize = 1`. #### Available Types of Column Statistics {#available-types-of-column-statistics} -- `tdigest` +- `TDigest` Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. +- `Uniq` + + Estimate the number of distinct values of a column by HyperLogLog. + ## Column-level Settings {#column-level-settings} Certain MergeTree settings can be override at column level: diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 0d422f64762..957b18b5305 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -102,6 +102,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Settings {#settings} diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index f6183a779ae..c906830d0e9 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -108,6 +108,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da - `_path` — Path to the `URL`. Type: `LowCardinalty(String)`. - `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`. - `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/getting-started/example-datasets/images/stackoverflow.png b/docs/en/getting-started/example-datasets/images/stackoverflow.png new file mode 100644 index 00000000000..f31acdc8cc3 Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/stackoverflow.png differ diff --git a/docs/en/getting-started/example-datasets/stackoverflow.md b/docs/en/getting-started/example-datasets/stackoverflow.md new file mode 100644 index 00000000000..e982a3c3dfc --- /dev/null +++ b/docs/en/getting-started/example-datasets/stackoverflow.md @@ -0,0 +1,394 @@ +--- +slug: /en/getting-started/example-datasets/stackoverflow +sidebar_label: Stack Overflow +sidebar_position: 1 +description: Analyzing Stack Overflow data with ClickHouse +--- + +# Analyzing Stack Overflow data with ClickHouse + +This dataset contains every `Post`, `User`, `Vote`, `Comment`, `Badge, `PostHistory`, and `PostLink` that has occurred on Stack Overflow. + +Users can either download pre-prepared Parquet versions of the data, containing every post up to April 2024, or download the latest data in XML format and load this. Stack Overflow provide updates to this data periodically - historically every 3 months. + +The following diagram shows the schema for the available tables assuming Parquet format. + +![Stack Overflow schema](./images/stackoverflow.png) + +A description of the schema of this data can be found [here](https://meta.stackexchange.com/questions/2677/database-schema-documentation-for-the-public-data-dump-and-sede). + +## Pre-prepared data + +We provide a copy of this data in Parquet format, up to date as of April 2024. While small for ClickHouse with respect to the number of rows (60 million posts), this dataset contains significant volumes of text and large String columns. + +```sql +CREATE DATABASE stackoverflow +``` + +The following timings are for a 96 GiB, 24 vCPU ClickHouse Cloud cluster located in `eu-west-2`. The dataset is located in `eu-west-3`. + +### Posts + +```sql +CREATE TABLE stackoverflow.posts +( + `Id` Int32 CODEC(Delta(4), ZSTD(1)), + `PostTypeId` Enum8('Question' = 1, 'Answer' = 2, 'Wiki' = 3, 'TagWikiExcerpt' = 4, 'TagWiki' = 5, 'ModeratorNomination' = 6, 'WikiPlaceholder' = 7, 'PrivilegeWiki' = 8), + `AcceptedAnswerId` UInt32, + `CreationDate` DateTime64(3, 'UTC'), + `Score` Int32, + `ViewCount` UInt32 CODEC(Delta(4), ZSTD(1)), + `Body` String, + `OwnerUserId` Int32, + `OwnerDisplayName` String, + `LastEditorUserId` Int32, + `LastEditorDisplayName` String, + `LastEditDate` DateTime64(3, 'UTC') CODEC(Delta(8), ZSTD(1)), + `LastActivityDate` DateTime64(3, 'UTC'), + `Title` String, + `Tags` String, + `AnswerCount` UInt16 CODEC(Delta(2), ZSTD(1)), + `CommentCount` UInt8, + `FavoriteCount` UInt8, + `ContentLicense` LowCardinality(String), + `ParentId` String, + `CommunityOwnedDate` DateTime64(3, 'UTC'), + `ClosedDate` DateTime64(3, 'UTC') +) +ENGINE = MergeTree +PARTITION BY toYear(CreationDate) +ORDER BY (PostTypeId, toDate(CreationDate), CreationDate) + +INSERT INTO stackoverflow.posts SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/*.parquet') + +0 rows in set. Elapsed: 265.466 sec. Processed 59.82 million rows, 38.07 GB (225.34 thousand rows/s., 143.42 MB/s.) +``` + +Posts are also available by year e.g. [https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet](https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet) + + +### Votes + +```sql +CREATE TABLE stackoverflow.votes +( + `Id` UInt32, + `PostId` Int32, + `VoteTypeId` UInt8, + `CreationDate` DateTime64(3, 'UTC'), + `UserId` Int32, + `BountyAmount` UInt8 +) +ENGINE = MergeTree +ORDER BY (VoteTypeId, CreationDate, PostId, UserId) + +INSERT INTO stackoverflow.votes SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/votes/*.parquet') + +0 rows in set. Elapsed: 21.605 sec. Processed 238.98 million rows, 2.13 GB (11.06 million rows/s., 98.46 MB/s.) +``` + +Votes are also available by year e.g. [https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet](https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/votes/2020.parquet) + + +### Comments + +```sql +CREATE TABLE stackoverflow.comments +( + `Id` UInt32, + `PostId` UInt32, + `Score` UInt16, + `Text` String, + `CreationDate` DateTime64(3, 'UTC'), + `UserId` Int32, + `UserDisplayName` LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY CreationDate + +INSERT INTO stackoverflow.comments SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/comments/*.parquet') + +0 rows in set. Elapsed: 56.593 sec. Processed 90.38 million rows, 11.14 GB (1.60 million rows/s., 196.78 MB/s.) +``` + +Comments are also available by year e.g. [https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet](https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/comments/2020.parquet) + +### Users + +```sql +CREATE TABLE stackoverflow.users +( + `Id` Int32, + `Reputation` LowCardinality(String), + `CreationDate` DateTime64(3, 'UTC') CODEC(Delta(8), ZSTD(1)), + `DisplayName` String, + `LastAccessDate` DateTime64(3, 'UTC'), + `AboutMe` String, + `Views` UInt32, + `UpVotes` UInt32, + `DownVotes` UInt32, + `WebsiteUrl` String, + `Location` LowCardinality(String), + `AccountId` Int32 +) +ENGINE = MergeTree +ORDER BY (Id, CreationDate) + +INSERT INTO stackoverflow.users SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/users.parquet') + +0 rows in set. Elapsed: 10.988 sec. Processed 22.48 million rows, 1.36 GB (2.05 million rows/s., 124.10 MB/s.) +``` + +### Badges + +```sql +CREATE TABLE stackoverflow.badges +( + `Id` UInt32, + `UserId` Int32, + `Name` LowCardinality(String), + `Date` DateTime64(3, 'UTC'), + `Class` Enum8('Gold' = 1, 'Silver' = 2, 'Bronze' = 3), + `TagBased` Bool +) +ENGINE = MergeTree +ORDER BY UserId + +INSERT INTO stackoverflow.badges SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/badges.parquet') + +0 rows in set. Elapsed: 6.635 sec. Processed 51.29 million rows, 797.05 MB (7.73 million rows/s., 120.13 MB/s.) +``` + +### `PostLinks` + +```sql +CREATE TABLE stackoverflow.postlinks +( + `Id` UInt64, + `CreationDate` DateTime64(3, 'UTC'), + `PostId` Int32, + `RelatedPostId` Int32, + `LinkTypeId` Enum8('Linked' = 1, 'Duplicate' = 3) +) +ENGINE = MergeTree +ORDER BY (PostId, RelatedPostId) + +INSERT INTO stackoverflow.postlinks SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/postlinks.parquet') + +0 rows in set. Elapsed: 1.534 sec. Processed 6.55 million rows, 129.70 MB (4.27 million rows/s., 84.57 MB/s.) +``` + +### `PostHistory` + +```sql +CREATE TABLE stackoverflow.posthistory +( + `Id` UInt64, + `PostHistoryTypeId` UInt8, + `PostId` Int32, + `RevisionGUID` String, + `CreationDate` DateTime64(3, 'UTC'), + `UserId` Int32, + `Text` String, + `ContentLicense` LowCardinality(String), + `Comment` String, + `UserDisplayName` String +) +ENGINE = MergeTree +ORDER BY (CreationDate, PostId) + +INSERT INTO stackoverflow.posthistory SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posthistory/*.parquet') + +0 rows in set. Elapsed: 422.795 sec. Processed 160.79 million rows, 67.08 GB (380.30 thousand rows/s., 158.67 MB/s.) +``` + +## Original dataset + +The original dataset is available in compressed (7zip) XML format at [https://archive.org/download/stackexchange](https://archive.org/download/stackexchange) - files with prefix `stackoverflow.com*`. + +### Download + +```bash +wget https://archive.org/download/stackexchange/stackoverflow.com-Badges.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-Comments.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-PostHistory.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-PostLinks.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-Posts.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-Users.7z +wget https://archive.org/download/stackexchange/stackoverflow.com-Votes.7z +``` + +These files are up to 35GB and can take around 30 mins to download depending on internet connection - the download server throttles at around 20MB/sec. + +### Convert to JSON + +At the time of writing, ClickHouse does not have native support for XML as an input format. To load the data into ClickHouse we first convert to NDJSON. + +To convert XML to JSON we recommend the [`xq`](https://github.com/kislyuk/yq) linux tool, a simple `jq` wrapper for XML documents. + +Install xq and jq: + +```bash +sudo apt install jq +pip install yq +``` + +The following steps apply to any of the above files. We use the `stackoverflow.com-Posts.7z` file as an example. Modify as required. + +Extract the file using [p7zip](https://p7zip.sourceforge.net/). This will produce a single xml file - in this case `Posts.xml`. + +> Files are compressed approximately 4.5x. At 22GB compressed, the posts file requires around 97G uncompressed. + +```bash +p7zip -d stackoverflow.com-Posts.7z +``` + +The following splits the xml file into files, each containing 10000 rows. + +```bash +mkdir posts +cd posts +# the following splits the input xml file into sub files of 10000 rows +tail +3 ../Posts.xml | head -n -1 | split -l 10000 --filter='{ printf "\n"; cat - ; printf "\n"; } > $FILE' - +``` + +After running the above users will have a set of files, each with 10000 lines. This ensures the memory overhead of the next command is not excessive (xml to JSON conversion is done in memory). + +```bash +find . -maxdepth 1 -type f -exec xq -c '.rows.row[]' {} \; | sed -e 's:"@:":g' > posts_v2.json +``` + +The above command will produce a single `posts.json` file. + +Load into ClickHouse with the following command. Note the schema is specified for the `posts.json` file. This will need to be adjusted per data type to align with the target table. + +```bash +clickhouse local --query "SELECT * FROM file('posts.json', JSONEachRow, 'Id Int32, PostTypeId UInt8, AcceptedAnswerId UInt32, CreationDate DateTime64(3, \'UTC\'), Score Int32, ViewCount UInt32, Body String, OwnerUserId Int32, OwnerDisplayName String, LastEditorUserId Int32, LastEditorDisplayName String, LastEditDate DateTime64(3, \'UTC\'), LastActivityDate DateTime64(3, \'UTC\'), Title String, Tags String, AnswerCount UInt16, CommentCount UInt8, FavoriteCount UInt8, ContentLicense String, ParentId String, CommunityOwnedDate DateTime64(3, \'UTC\'), ClosedDate DateTime64(3, \'UTC\')') FORMAT Native" | clickhouse client --host --secure --password --query "INSERT INTO stackoverflow.posts_v2 FORMAT Native" +``` + +## Example queries + +A few simple questions to you get started. + +### Most popular tags on Stack Overflow + +```sql + +SELECT + arrayJoin(arrayFilter(t -> (t != ''), splitByChar('|', Tags))) AS Tags, + count() AS c +FROM stackoverflow.posts +GROUP BY Tags +ORDER BY c DESC +LIMIT 10 + +┌─Tags───────┬───────c─┐ +│ javascript │ 2527130 │ +│ python │ 2189638 │ +│ java │ 1916156 │ +│ c# │ 1614236 │ +│ php │ 1463901 │ +│ android │ 1416442 │ +│ html │ 1186567 │ +│ jquery │ 1034621 │ +│ c++ │ 806202 │ +│ css │ 803755 │ +└────────────┴─────────┘ + +10 rows in set. Elapsed: 1.013 sec. Processed 59.82 million rows, 1.21 GB (59.07 million rows/s., 1.19 GB/s.) +Peak memory usage: 224.03 MiB. +``` + +### User with the most answers (active accounts) + +Account requires a `UserId`. + +```sql +SELECT + any(OwnerUserId) UserId, + OwnerDisplayName, + count() AS c +FROM stackoverflow.posts WHERE OwnerDisplayName != '' AND PostTypeId='Answer' AND OwnerUserId != 0 +GROUP BY OwnerDisplayName +ORDER BY c DESC +LIMIT 5 + +┌─UserId─┬─OwnerDisplayName─┬────c─┐ +│ 22656 │ Jon Skeet │ 2727 │ +│ 23354 │ Marc Gravell │ 2150 │ +│ 12950 │ tvanfosson │ 1530 │ +│ 3043 │ Joel Coehoorn │ 1438 │ +│ 10661 │ S.Lott │ 1087 │ +└────────┴──────────────────┴──────┘ + +5 rows in set. Elapsed: 0.154 sec. Processed 35.83 million rows, 193.39 MB (232.33 million rows/s., 1.25 GB/s.) +Peak memory usage: 206.45 MiB. +``` + +### ClickHouse related posts with the most views + +```sql +SELECT + Id, + Title, + ViewCount, + AnswerCount +FROM stackoverflow.posts +WHERE Title ILIKE '%ClickHouse%' +ORDER BY ViewCount DESC +LIMIT 10 + +┌───────Id─┬─Title────────────────────────────────────────────────────────────────────────────┬─ViewCount─┬─AnswerCount─┐ +│ 52355143 │ Is it possible to delete old records from clickhouse table? │ 41462 │ 3 │ +│ 37954203 │ Clickhouse Data Import │ 38735 │ 3 │ +│ 37901642 │ Updating data in Clickhouse │ 36236 │ 6 │ +│ 58422110 │ Pandas: How to insert dataframe into Clickhouse │ 29731 │ 4 │ +│ 63621318 │ DBeaver - Clickhouse - SQL Error [159] .. Read timed out │ 27350 │ 1 │ +│ 47591813 │ How to filter clickhouse table by array column contents? │ 27078 │ 2 │ +│ 58728436 │ How to search the string in query with case insensitive on Clickhouse database? │ 26567 │ 3 │ +│ 65316905 │ Clickhouse: DB::Exception: Memory limit (for query) exceeded │ 24899 │ 2 │ +│ 49944865 │ How to add a column in clickhouse │ 24424 │ 1 │ +│ 59712399 │ How to cast date Strings to DateTime format with extended parsing in ClickHouse? │ 22620 │ 1 │ +└──────────┴──────────────────────────────────────────────────────────────────────────────────┴───────────┴─────────────┘ + +10 rows in set. Elapsed: 0.472 sec. Processed 59.82 million rows, 1.91 GB (126.63 million rows/s., 4.03 GB/s.) +Peak memory usage: 240.01 MiB. +``` + +### Most controversial posts + +```sql +SELECT + Id, + Title, + UpVotes, + DownVotes, + abs(UpVotes - DownVotes) AS Controversial_ratio +FROM stackoverflow.posts +INNER JOIN +( + SELECT + PostId, + countIf(VoteTypeId = 2) AS UpVotes, + countIf(VoteTypeId = 3) AS DownVotes + FROM stackoverflow.votes + GROUP BY PostId + HAVING (UpVotes > 10) AND (DownVotes > 10) +) AS votes ON posts.Id = votes.PostId +WHERE Title != '' +ORDER BY Controversial_ratio ASC +LIMIT 3 + +┌───────Id─┬─Title─────────────────────────────────────────────┬─UpVotes─┬─DownVotes─┬─Controversial_ratio─┐ +│ 583177 │ VB.NET Infinite For Loop │ 12 │ 12 │ 0 │ +│ 9756797 │ Read console input as enumerable - one statement? │ 16 │ 16 │ 0 │ +│ 13329132 │ What's the point of ARGV in Ruby? │ 22 │ 22 │ 0 │ +└──────────┴───────────────────────────────────────────────────┴─────────┴───────────┴─────────────────────┘ + +3 rows in set. Elapsed: 4.779 sec. Processed 298.80 million rows, 3.16 GB (62.52 million rows/s., 661.05 MB/s.) +Peak memory usage: 6.05 GiB. +``` + +## Attribution + +We thank Stack Overflow for providing this data under the `cc-by-sa 4.0` license, acknowledging their efforts and the original source of the data at [https://archive.org/details/stackexchange](https://archive.org/details/stackexchange). diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 67752f223ce..98e73dec451 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -314,7 +314,7 @@ For example, to download a aarch64 binary for ClickHouse v23.4, follow these ste - Find the GitHub pull request for release v23.4: [Release pull request for branch 23.4](https://github.com/ClickHouse/ClickHouse/pull/49238) - Click "Commits", then click a commit similar to "Update autogenerated version to 23.4.2.1 and contributors" for the particular version you like to install. - Click the green check / yellow dot / red cross to open the list of CI checks. -- Click "Details" next to "ClickHouse Build Check" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html) +- Click "Details" next to "Builds" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html) - Find the rows with compiler = "clang-*-aarch64" - there are multiple rows. - Download the artifacts for these builds. diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 1eb426af617..e18ff6f1a3f 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -193,6 +193,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--hardware-utilization` — Print hardware utilization information in progress bar. - `--print-profile-events` – Print `ProfileEvents` packets. - `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). +- `--jwt` – If specified, enables authorization via JSON Web Token. Server JWT authorization is available only in ClickHouse Cloud. Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section). diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 66d5bd2e574..a81a17e65d6 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -480,7 +480,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`. - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. -- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. +- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`. - [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. - [input_format_csv_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_csv_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`. @@ -1490,6 +1490,8 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000 - [output_format_pretty_color](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. - [output_format_pretty_grid_charset](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. - [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `true`. +- [output_format_pretty_display_footer_column_names](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_display_footer_column_names) - Display column names in the footer if table contains many rows. Default value - `true`. +- [output_format_pretty_display_footer_column_names_min_rows](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_display_footer_column_names_min_rows) - Sets the minimum number of rows for which a footer will be displayed if [output_format_pretty_display_footer_column_names](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_display_footer_column_names) is enabled. Default value - 50. ## RowBinary {#rowbinary} @@ -2165,6 +2167,9 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`. - [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`. +- [input_format_parquet_max_block_size](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Max block row size for parquet reader. Default value - `65409`. +- [input_format_parquet_prefer_block_bytes](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_prefer_block_bytes) - Average block bytes output by parquet reader. Default value - `16744704`. +- [output_format_parquet_write_page_index](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Add a possibility to write page index into parquet files. Need to disable `output_format_parquet_use_custom_encoder` at present. Default value - `true`. ## ParquetMetadata {data-format-parquet-metadata} diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index eb1a3ba1dbc..f5b6326fa96 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -508,7 +508,7 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`: - `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request. -- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`. +- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `http_response_headers`, `response_content`, `query`, `query_param_name`. `type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static). - `query` — use with `predefined_query_handler` type, executes query when the handler is called. @@ -519,6 +519,8 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`: - `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). + - `http_response_headers` — use with any type, response headers map. Could be used to set content type as well. + - `response_content` — use with `static` type, response content sent to client, when using the prefix ‘file://’ or ‘config://’, find the content from the file or configuration sends to client. Next are the configuration methods for different `type`. @@ -616,6 +618,33 @@ Return a message. static 402 text/html; charset=UTF-8 + + en + 43 + + Say Hi! + + + + +``` + +`http_response_headers` could be used to set content type instead of `content_type`. + +``` xml + + + GET + xxx + /hi + + static + 402 + + text/html; charset=UTF-8 + en + 43 + Say Hi! @@ -696,6 +725,9 @@ Find the content from the file send to client. static text/html; charset=UTF-8 + + 737060cd8c284d8af7ad3082f209582d + file:///absolute_path_file.html @@ -706,6 +738,9 @@ Find the content from the file send to client. static text/html; charset=UTF-8 + + 737060cd8c284d8af7ad3082f209582d + file://./relative_path_file.html diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index ce5ab24ecb0..42820505406 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -31,6 +31,56 @@ Alternatively, in order to enable the MySQL interface for an existing service: 3. After entering the password, you will get prompted the MySQL connection string for this service ![Connection screen - MySQL Enabled](./images/mysql5.png) +## Creating multiple MySQL users in ClickHouse Cloud + +By default, there is a built-in `mysql4` user, which uses the same password as the `default` one. The `` part is the first segment of your ClickHouse Cloud hostname. This format is necessary to work with the tools that implement secure connection, but don't provide [SNI information in their TLS handshake](https://www.cloudflare.com/learning/ssl/what-is-sni), which makes it impossible to do the internal routing without an extra hint in the username (MySQL console client is one of such tools). + +Because of this, we _highly recommend_ following the `mysql4_` format when creating a new user intended to be used with the MySQL interface, where `` is a hint to identify your Cloud service, and `` is an arbitrary suffix of your choice. + +:::tip +For ClickHouse Cloud hostname like `foobar.us-east1.aws.clickhouse.cloud`, the `` part equals to `foobar`, and a custom MySQL username could look like `mysql4foobar_team1`. +::: + +You can create extra users to use with the MySQL interface if, for example, you need to apply extra settings. + +1. Optional - create a [settings profile](https://clickhouse.com/docs/en/sql-reference/statements/create/settings-profile) to apply for your custom user. For example, `my_custom_profile` with an extra setting which will be applied by default when we connect with the user we create later: + + ```sql + CREATE SETTINGS PROFILE my_custom_profile SETTINGS prefer_column_name_to_alias=1; + ``` + + `prefer_column_name_to_alias` is used just as an example, you can use other settings there. +2. [Create a user](https://clickhouse.com/docs/en/sql-reference/statements/create/user) using the following format: `mysql4_` ([see above](#creating-multiple-mysql-users-in-clickhouse-cloud)). The password must be in double SHA1 format. For example: + + ```sql + CREATE USER mysql4foobar_team1 IDENTIFIED WITH double_sha1_password BY 'YourPassword42$'; + ``` + + or if you want to use a custom profile for this user: + + ```sql + CREATE USER mysql4foobar_team1 IDENTIFIED WITH double_sha1_password BY 'YourPassword42$' SETTINGS PROFILE 'my_custom_profile'; + ``` + + where `my_custom_profile` is the name of the profile you created earlier. +3. [Grant](https://clickhouse.com/docs/en/sql-reference/statements/grant) the new user the necessary permissions to interact with the desired tables or databases. For example, if you want to grant access to `system.query_log` only: + + ```sql + GRANT SELECT ON system.query_log TO mysql4foobar_team1; + ``` + +4. Use the created user to connect to your ClickHouse Cloud service with the MySQL interface. + +### Troubleshooting multiple MySQL users in ClickHouse Cloud + +If you created a new MySQL user, and you see the following error while connecting via MySQL CLI client: + +``` +ERROR 2013 (HY000): Lost connection to MySQL server at 'reading authorization packet', system error: 54 +``` + +In this case, ensure that the username follows the `mysql4_` format, as described ([above](#creating-multiple-mysql-users-in-clickhouse-cloud)). + ## Enabling the MySQL Interface On Self-managed ClickHouse Add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files): diff --git a/docs/en/operations/allocation-profiling.md b/docs/en/operations/allocation-profiling.md index 64b4106a7e1..574e1ae2ff3 100644 --- a/docs/en/operations/allocation-profiling.md +++ b/docs/en/operations/allocation-profiling.md @@ -59,10 +59,10 @@ For that, we need to use `jemalloc`'s tool called [jeprof](https://github.com/je If that’s the case, we recommend installing an [alternative implementation](https://github.com/gimli-rs/addr2line) of the tool. ``` -git clone https://github.com/gimli-rs/addr2line +git clone https://github.com/gimli-rs/addr2line.git --depth=1 --branch=0.23.0 cd addr2line -cargo b --examples -r -cp ./target/release/examples/addr2line path/to/current/addr2line +cargo build --features bin --release +cp ./target/release/addr2line path/to/current/addr2line ``` ::: diff --git a/docs/en/operations/analyzer.md b/docs/en/operations/analyzer.md new file mode 100644 index 00000000000..298c6dacd06 --- /dev/null +++ b/docs/en/operations/analyzer.md @@ -0,0 +1,194 @@ +--- +slug: /en/operations/analyzer +sidebar_label: Analyzer +title: Analyzer +description: Details about ClickHouse's query analyzer +keywords: [analyzer] +--- + +# Analyzer + + + +## Known incompatibilities + +In ClickHouse version `24.3`, the new query analyzer was enabled by default. +Despite fixing a large number of bugs and introducing new optimizations, it also introduces some breaking changes in ClickHouse behaviour. Please read the following changes to determine how to rewrite your queries for the new analyzer. + +### Invalid queries are no longer optimized + +The previous query planning infrastructure applied AST-level optimizations before the query validation step. +Optimizations could rewrite the initial query so it becomes valid and can be executed. + +In the new analyzer, query validation takes place before the optimization step. +This means that invalid queries that were possible to execute before are now unsupported. +In such cases, the query must be fixed manually. + +**Example 1:** + +```sql +SELECT number +FROM numbers(1) +GROUP BY toString(number) +``` + +The following query uses column `number` in the projection list when only `toString(number)` is available after the aggregation. +In the old analyzer, `GROUP BY toString(number)` was optimized into `GROUP BY number,` making the query valid. + +**Example 2:** + +```sql +SELECT + number % 2 AS n, + sum(number) +FROM numbers(10) +GROUP BY n +HAVING number > 5 +``` + +The same problem occurs in this query: column `number` is used after aggregation with another key. +The previous query analyzer fixed this query by moving the `number > 5` filter from the `HAVING` clause to the `WHERE` clause. + +To fix the query, you should move all conditions that apply to non-aggregated columns to the `WHERE` section to conform to standard SQL syntax: +```sql +SELECT + number % 2 AS n, + sum(number) +FROM numbers(10) +WHERE number > 5 +GROUP BY n +``` + +### CREATE VIEW with invalid query + +The new analyzer always performs type-checking. +Previously, it was possible to create a `VIEW` with an invalid `SELECT` query. It would then fail during the first `SELECT` or `INSERT` (in the case of `MATERIALIZED VIEW`). + +Now, it's not possible to create such `VIEW`s anymore. + +**Example:** + +```sql +CREATE TABLE source (data String) ENGINE=MergeTree ORDER BY tuple(); + +CREATE VIEW some_view +AS SELECT JSONExtract(data, 'test', 'DateTime64(3)') +FROM source; +``` + +### Known incompatibilities of the `JOIN` clause + +#### Join using column from projection + +Alias from the `SELECT` list can not be used as a `JOIN USING` key by default. + +A new setting, `analyzer_compatibility_join_using_top_level_identifier`, when enabled, alters the behavior of `JOIN USING` to prefer to resolve identifiers based on expressions from the projection list of the `SELECT` query, rather than using the columns from left table directly. + +**Example:** + +```sql +SELECT a + 1 AS b, t2.s +FROM Values('a UInt64, b UInt64', (1, 1)) AS t1 +JOIN Values('b UInt64, s String', (1, 'one'), (2, 'two')) t2 +USING (b); +``` + +With `analyzer_compatibility_join_using_top_level_identifier` set to `true`, the join condition is interpreted as `t1.a + 1 = t2.b`, matching the behavior of earlier versions. So, the result will be `2, 'two'`. +When the setting is `false`, the join condition defaults to `t1.b = t2.b`, and the query will return `2, 'one'`. +If `b` is not present in `t1`, the query will fail with an error. + +#### Changes in behavior with `JOIN USING` and `ALIAS`/`MATERIALIZED` columns + +In the new analyzer, using `*` in a `JOIN USING` query that involves `ALIAS` or `MATERIALIZED` columns will include those columns in the result set by default. + +**Example:** + +```sql +CREATE TABLE t1 (id UInt64, payload ALIAS sipHash64(id)) ENGINE = MergeTree ORDER BY id; +INSERT INTO t1 VALUES (1), (2); + +CREATE TABLE t2 (id UInt64, payload ALIAS sipHash64(id)) ENGINE = MergeTree ORDER BY id; +INSERT INTO t2 VALUES (2), (3); + +SELECT * FROM t1 +FULL JOIN t2 USING (payload); +``` + +In the new analyzer, the result of this query will include the `payload` column along with `id` from both tables. In contrast, the previous analyzer would only include these `ALIAS` columns if specific settings (`asterisk_include_alias_columns` or `asterisk_include_materialized_columns`) were enabled, and the columns might appear in a different order. + +To ensure consistent and expected results, especially when migrating old queries to the new analyzer, it is advisable to specify columns explicitly in the `SELECT` clause rather than using `*`. + +#### Handling of Type Modifiers for columns in `USING` Clause + +In the new version of the analyzer, the rules for determining the common supertype for columns specified in the `USING` clause have been standardized to produce more predictable outcomes, especially when dealing with type modifiers like `LowCardinality` and `Nullable`. + +- `LowCardinality(T)` and `T`: When a column of type `LowCardinality(T)` is joined with a column of type `T`, the resulting common supertype will be `T`, effectively discarding the `LowCardinality` modifier. + +- `Nullable(T)` and `T`: When a column of type `Nullable(T)` is joined with a column of type `T`, the resulting common supertype will be `Nullable(T)`, ensuring that the nullable property is preserved. + +**Example:** + +```sql +SELECT id, toTypeName(id) FROM Values('id LowCardinality(String)', ('a')) AS t1 +FULL OUTER JOIN Values('id String', ('b')) AS t2 +USING (id); +``` + +In this query, the common supertype for `id` is determined as `String`, discarding the `LowCardinality` modifier from `t1`. + +### Projection column names changes + +During projection names computation, aliases are not substituted. + +```sql +SELECT + 1 + 1 AS x, + x + 1 +SETTINGS allow_experimental_analyzer = 0 +FORMAT PrettyCompact + + ┌─x─┬─plus(plus(1, 1), 1)─┐ +1. │ 2 │ 3 │ + └───┴─────────────────────┘ + +SELECT + 1 + 1 AS x, + x + 1 +SETTINGS allow_experimental_analyzer = 1 +FORMAT PrettyCompact + + ┌─x─┬─plus(x, 1)─┐ +1. │ 2 │ 3 │ + └───┴────────────┘ +``` + +### Incompatible function arguments types + +In the new analyzer, type inference happens during initial query analysis. +This change means that type checks are done before short-circuit evaluation; thus, `if` function arguments must always have a common supertype. + +**Example:** + +The following query fails with `There is no supertype for types Array(UInt8), String because some of them are Array and some of them are not`: + +```sql +SELECT toTypeName(if(0, [2, 3, 4], 'String')) +``` + +### Heterogeneous clusters + +The new analyzer significantly changed the communication protocol between servers in the cluster. Thus, it's impossible to run distributed queries on servers with different `allow_experimental_analyzer` setting values. + +### Mutations are interpreted by previous analyzer + +Mutations are still using the old analyzer. +This means some new ClickHouse SQL features can't be used in mutations. For example, the `QUALIFY` clause. +Status can be checked [here](https://github.com/ClickHouse/ClickHouse/issues/61563). + +### Unsupported features + +The list of features new analyzer currently doesn't support: + +- Annoy index. +- Hypothesis index. Work in progress [here](https://github.com/ClickHouse/ClickHouse/pull/48381). +- Window view is not supported. There are no plans to support it in the future. diff --git a/docs/en/operations/external-authenticators/ssl-x509.md b/docs/en/operations/external-authenticators/ssl-x509.md index 109913c2b18..09fac45d7ae 100644 --- a/docs/en/operations/external-authenticators/ssl-x509.md +++ b/docs/en/operations/external-authenticators/ssl-x509.md @@ -6,23 +6,30 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.m -[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration. +[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration. -To enable SSL certificate authentication, a list of `Common Name`'s for each ClickHouse user must be specified in the settings file `users.xml `: +To enable SSL certificate authentication, a list of `Common Name`'s or `Subject Alt Name`'s for each ClickHouse user must be specified in the settings file `users.xml `: **Example** ```xml - + host.domain.com:example_user host.domain.com:example_user_dev - + + + + DNS:host.domain.com + + + + ``` diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index c9d94dd95ee..59ee05d1f9e 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -5,6 +5,10 @@ sidebar_label: "Named collections" title: "Named collections" --- +import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge'; + + + Named collections provide a way to store collections of key-value pairs to be used to configure integrations with external sources. You can use named collections with dictionaries, tables, table functions, and object storage. @@ -67,6 +71,23 @@ To manage named collections with DDL a user must have the `named_control_collect In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user. ::: +### Storage for named collections + +Named collections can either be stored on local disk or in zookeeper/keeper. By default local storage is used. + +To configure named collections storage in keeper and a `type` (equal to either `keeper` or `zookeeper`) and `path` (path in keeper, where named collections will be stored) to `named_collections_storage` section in configuration file: +``` + + + zookeeper + /named_collections_path/ + 1000 + + +``` + +An optional configuration parameter `update_timeout_ms` by default is equal to `5000`. + ## Storing named collections in configuration files ### XML example @@ -443,3 +464,59 @@ SELECT dictGet('dict', 'b', 1); │ a │ └─────────────────────────┘ ``` + +## Named collections for accessing Kafka + +The description of parameters see [Kafka](../engines/table-engines/integrations/kafka.md). + +### DDL example + +```sql +CREATE NAMED COLLECTION my_kafka_cluster AS +kafka_broker_list = 'localhost:9092', +kafka_topic_list = 'kafka_topic', +kafka_group_name = 'consumer_group', +kafka_format = 'JSONEachRow', +kafka_max_block_size = '1048576'; + +``` +### XML example + +```xml + + + + localhost:9092 + kafka_topic + consumer_group + JSONEachRow + 1048576 + + + +``` + +### Example of using named collections with a Kafka table + +Both of the following examples use the same named collection `my_kafka_cluster`: + + +```sql +CREATE TABLE queue +( + timestamp UInt64, + level String, + message String +) +ENGINE = Kafka(my_kafka_cluster) + +CREATE TABLE queue +( + timestamp UInt64, + level String, + message String +) +ENGINE = Kafka(my_kafka_cluster) +SETTINGS kafka_num_consumers = 4, + kafka_thread_per_consumer = 1; +``` diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index a5fe74fd0c6..8278f8c8699 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -498,6 +498,8 @@ Default: 0.9 Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see settings `cgroup_memory_watcher_hard_limit_ratio` and `cgroup_memory_watcher_soft_limit_ratio`). +To disable the cgroup observer, set this value to `0`. + Type: UInt64 Default: 15 @@ -591,6 +593,22 @@ Default value: 100000 400 ``` +## max\_table\_num\_to\_throw {#max-table-num-to-throw} +If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.Default value: 0 + +**Example** +```xml +400 +``` + +## max\_database\_num\_to\_throw {#max-table-num-to-throw} +If number of _database is greater than this value, server will throw an exception. 0 means no limitation. +Default value: 0 + +**Example** +```xml +400 +``` ## max_temporary_data_on_disk_size @@ -938,6 +956,38 @@ Or it can be set in hex: Everything mentioned above can be applied for `aes_256_gcm_siv` (but the key must be 32 bytes long). +## error_log {#error_log} + +It is disabled by default. + +**Enabling** + +To manually turn on error history collection [`system.error_log`](../../operations/system-tables/error_log.md), create `/etc/clickhouse-server/config.d/error_log.xml` with the following content: + +``` xml + + + system + error_log
+ 7500 + 1000 + 1048576 + 8192 + 524288 + false +
+
+``` + +**Disabling** + +To disable `error_log` setting, you should create the following file `/etc/clickhouse-server/config.d/disable_error_log.xml` with the following content: + +``` xml + + + +``` ## custom_settings_prefixes {#custom_settings_prefixes} @@ -1206,6 +1256,16 @@ Expired time for HSTS in seconds. The default value is 0 means clickhouse disabl 600000 ``` +## mlock_executable {#mlock_executable} + +Perform mlockall after startup to lower first queries latency and to prevent clickhouse executable from being paged out under high IO load. Enabling this option is recommended but will lead to increased startup time for up to a few seconds. +Keep in mind that this parameter would not work without "CAP_IPC_LOCK" capability. +**Example** + +``` xml +false +``` + ## include_from {#include_from} The path to the file with substitutions. Both XML and YAML formats are supported. @@ -1353,6 +1413,26 @@ Examples: 127.0.0.1 ``` +## listen_try {#listen_try} + +The server will not exit if IPv6 or IPv4 networks are unavailable while trying to listen. + +Examples: + +``` xml +0 +``` + +## listen_reuse_port {#listen_reuse_port} + +Allow multiple servers to listen on the same address:port. Requests will be routed to a random server by the operating system. Enabling this setting is not recommended. + +Examples: + +``` xml +0 +``` + ## listen_backlog {#listen_backlog} Backlog (queue size of pending connections) of the listen socket. @@ -1385,6 +1465,9 @@ Keys: - `size` – Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. - `count` – The number of archived log files that ClickHouse stores. - `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. +- `console_log_level` – Logging level for console. Default to `level`. +- `use_syslog` - Log to syslog as well. +- `syslog_level` - Logging level for logging to syslog. - `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. - `formatting` – Specify log format to be printed in console log (currently only `json` supported). @@ -1871,7 +1954,7 @@ For more information, see the MergeTreeSettings.h header file. ## metric_log {#metric_log} -It is enabled by default. If it`s not, you can do this manually. +It is disabled by default. **Enabling** @@ -2894,6 +2977,8 @@ Define proxy servers for HTTP and HTTPS requests, currently supported by S3 stor There are three ways to define proxy servers: environment variables, proxy lists, and remote proxy resolvers. +Bypassing proxy servers for specific hosts is also supported with the use of `no_proxy`. + ### Environment variables The `http_proxy` and `https_proxy` environment variables allow you to specify a @@ -3003,6 +3088,29 @@ This also allows a mix of resolver types can be used. By default, tunneling (i.e, `HTTP CONNECT`) is used to make `HTTPS` requests over `HTTP` proxy. This setting can be used to disable it. +### no_proxy +By default, all requests will go through the proxy. In order to disable it for specific hosts, the `no_proxy` variable must be set. +It can be set inside the `` clause for list and remote resolvers and as an environment variable for environment resolver. +It supports IP addresses, domains, subdomains and `'*'` wildcard for full bypass. Leading dots are stripped just like curl does. + +Example: + +The below configuration bypasses proxy requests to `clickhouse.cloud` and all of its subdomains (e.g, `auth.clickhouse.cloud`). +The same applies to GitLab, even though it has a leading dot. Both `gitlab.com` and `about.gitlab.com` would bypass the proxy. + +``` xml + + clickhouse.cloud,.gitlab.com + + http://proxy1 + http://proxy2:3128 + + + http://proxy1:3128 + + +``` + ## max_materialized_views_count_for_table {#max_materialized_views_count_for_table} A limit on the number of materialized views attached to a table. @@ -3029,3 +3137,21 @@ This setting is only necessary for the migration period and will become obsolete Type: Bool Default: 1 + +## merge_workload {#merge_workload} + +Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for all background merges. Can be overridden by a merge tree setting. + +Default value: "default" + +**See Also** +- [Workload Scheduling](/docs/en/operations/workload-scheduling.md) + +## mutation_workload {#mutation_workload} + +Used to regulate how resources are utilized and shared between mutations and other workloads. Specified value is used as `workload` setting value for all background mutations. Can be overridden by a merge tree setting. + +Default value: "default" + +**See Also** +- [Workload Scheduling](/docs/en/operations/workload-scheduling.md) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index c3f303dcd38..9879ee35612 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -3,9 +3,126 @@ slug: /en/operations/settings/merge-tree-settings title: "MergeTree tables settings" --- -The values of `merge_tree` settings (for all MergeTree tables) can be viewed in the table `system.merge_tree_settings`, they can be overridden in `config.xml` in the `merge_tree` section, or set in the `SETTINGS` section of each table. +System table `system.merge_tree_settings` shows the globally set MergeTree settings. -These are example overrides for `max_suspicious_broken_parts`: +MergeTree settings can be set in the `merge_tree` section of the server config file, or specified for each `MergeTree` table individually in +the `SETTINGS` clause of the `CREATE TABLE` statement. + +Example for customizing setting `max_suspicious_broken_parts`: + +Configure the default for all `MergeTree` tables in the server configuration file: + +``` text + + 5 + +``` + +Set for a particular table: + +``` sql +CREATE TABLE tab +( + `A` Int64 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS max_suspicious_broken_parts = 500; +``` + +Change the settings for a particular table using `ALTER TABLE ... MODIFY SETTING`: + +```sql +ALTER TABLE tab MODIFY SETTING max_suspicious_broken_parts = 100; + +-- reset to global default (value from system.merge_tree_settings) +ALTER TABLE tab RESET SETTING max_suspicious_broken_parts; +``` + +## index_granularity + +Maximum number of data rows between the marks of an index. + +Default value: 8192. + +## index_granularity_bytes + +Maximum size of data granules in bytes. + +Default value: 10Mb. + +To restrict the granule size only by number of rows, set to 0 (not recommended). + +## min_index_granularity_bytes + +Min allowed size of data granules in bytes. + +Default value: 1024b. + +To provide a safeguard against accidentally creating tables with very low index_granularity_bytes. + +## enable_mixed_granularity_parts + +Enables or disables transitioning to control the granule size with the `index_granularity_bytes` setting. Before version 19.11, there was only the `index_granularity` setting for restricting granule size. The `index_granularity_bytes` setting improves ClickHouse performance when selecting data from tables with big rows (tens and hundreds of megabytes). If you have tables with big rows, you can enable this setting for the tables to improve the efficiency of `SELECT` queries. + +## use_minimalistic_part_header_in_zookeeper + +Storage method of the data parts headers in ZooKeeper. If enabled, ZooKeeper stores less data. For details, see [here](../server-configuration-parameters/settings.md/#server-settings-use_minimalistic_part_header_in_zookeeper). + +## min_merge_bytes_to_use_direct_io + +The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. +When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. +If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). +If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. + +Default value: `10 * 1024 * 1024 * 1024` bytes. + +## merge_with_ttl_timeout + +Minimum delay in seconds before repeating a merge with delete TTL. + +Default value: `14400` seconds (4 hours). + +## merge_with_recompression_ttl_timeout + +Minimum delay in seconds before repeating a merge with recompression TTL. + +Default value: `14400` seconds (4 hours). + +## write_final_mark + +Enables or disables writing the final index mark at the end of data part (after the last byte). + +Default value: 1. + +Don’t change or bad things will happen. + +## storage_policy + +Storage policy. + +## min_bytes_for_wide_part + +Minimum number of bytes/rows in a data part that can be stored in `Wide` format. +You can set one, both or none of these settings. + +## max_compress_block_size + +Maximum size of blocks of uncompressed data before compressing for writing to a table. +You can also specify this setting in the global settings (see [max_compress_block_size](/docs/en/operations/settings/settings.md/#max-compress-block-size) setting). +The value specified when table is created overrides the global value for this setting. + +## min_compress_block_size + +Minimum size of blocks of uncompressed data required for compression when writing the next mark. +You can also specify this setting in the global settings (see [min_compress_block_size](/docs/en/operations/settings/settings.md/#min-compress-block-size) setting). +The value specified when table is created overrides the global value for this setting. + +## max_partitions_to_read + +Limits the maximum number of partitions that can be accessed in one query. +You can also specify setting [max_partitions_to_read](/docs/en/operations/settings/merge-tree-settings.md/#max-partitions-to-read) in the global setting. ## max_suspicious_broken_parts @@ -17,37 +134,6 @@ Possible values: Default value: 100. -Override example in `config.xml`: - -``` text - - 5 - -``` - -An example to set in `SETTINGS` for a particular table: - -``` sql -CREATE TABLE foo -( - `A` Int64 -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS max_suspicious_broken_parts = 500; -``` - -An example of changing the settings for a specific table with the `ALTER TABLE ... MODIFY SETTING` command: - -``` sql -ALTER TABLE foo - MODIFY SETTING max_suspicious_broken_parts = 100; - --- reset to default (use value from system.merge_tree_settings) -ALTER TABLE foo - RESET SETTING max_suspicious_broken_parts; -``` - ## parts_to_throw_insert {#parts-to-throw-insert} If the number of active parts in a single partition exceeds the `parts_to_throw_insert` value, `INSERT` is interrupted with the `Too many parts (N). Merges are processing significantly slower than inserts` exception. @@ -301,6 +387,8 @@ Default value: 10800 ## try_fetch_recompressed_part_timeout +Timeout (in seconds) before starting merge with recompression. During this time ClickHouse tries to fetch recompressed part from replica which assigned this merge with recompression. + Recompression works slow in most cases, so we don't start merge with recompression until this timeout and trying to fetch recompressed part from replica which assigned this merge with recompression. Possible values: @@ -886,10 +974,30 @@ Default value: false - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting -### allow_experimental_optimized_row_order +## merge_workload + +Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead. + +Default value: an empty string + +**See Also** +- [Workload Scheduling](/docs/en/operations/workload-scheduling.md) + +## mutation_workload + +Used to regulate how resources are utilized and shared between mutations and other workloads. Specified value is used as `workload` setting value for background mutations of this table. If not specified (empty string), then server setting `mutation_workload` is used instead. + +Default value: an empty string + +**See Also** +- [Workload Scheduling](/docs/en/operations/workload-scheduling.md) + +### optimize_row_order Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part. +Only has an effect for ordinary MergeTree-engine tables. Does nothing for specialized MergeTree engine tables (e.g. CollapsingMergeTree). + MergeTree tables are (optionally) compressed using [compression codecs](../../sql-reference/statements/create/table.md#column_compression_codec). Generic compression codecs such as LZ4 and ZSTD achieve maximum compression rates if the data exposes patterns. Long runs of the same value typically compress very well. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 1a27b350652..530023df5b7 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1417,6 +1417,24 @@ Compression method used in output Parquet format. Supported codecs: `snappy`, `l Default value: `lz4`. +### input_format_parquet_max_block_size {#input_format_parquet_max_block_size} +Max block row size for parquet reader. By controlling the number of rows in each block, you can control the memory usage, +and in some operators that cache blocks, you can improve the accuracy of the operator's memory control。 + +Default value: `65409`. + +### input_format_parquet_prefer_block_bytes {#input_format_parquet_prefer_block_bytes} +Average block bytes output by parquet reader. Lowering the configuration in the case of reading some high compression parquet relieves the memory pressure. + +Default value: `65409 * 256 = 16744704` + +### output_format_parquet_write_page_index {#input_format_parquet_max_block_size} + +Could add page index into parquet files. To enable this, need set `output_format_parquet_use_custom_encoder`=`false` and +`output_format_parquet_write_page_index`=`true`. + +Enable by default. + ## Hive format settings {#hive-format-settings} ### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} @@ -1695,6 +1713,43 @@ Result: └────────────┘ ``` +## output_format_pretty_display_footer_column_names + +Display column names in the footer if there are many table rows. + +Possible values: + +- 0 — No column names are displayed in the footer. +- 1 — Column names are displayed in the footer if row count is greater than or equal to the threshold value set by [output_format_pretty_display_footer_column_names_min_rows](#output_format_pretty_display_footer_column_names_min_rows) (50 by default). + +Default value: `1`. + +**Example** + +Query: + +```sql +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 1000); +``` + +Result: + +```response + ┌─number─┬─toTypeName(number)─┐ + 1. │ 0 │ UInt64 │ + 2. │ 1 │ UInt64 │ + 3. │ 2 │ UInt64 │ + ... + 999. │ 998 │ UInt64 │ +1000. │ 999 │ UInt64 │ + └─number─┴─toTypeName(number)─┘ +``` +## output_format_pretty_display_footer_column_names_min_rows + +Sets the minimum number of rows for which a footer with column names will be displayed if setting [output_format_pretty_display_footer_column_names](#output_format_pretty_display_footer_column_names) is enabled. + +Default value: `50`. + ## Template format settings {#template-format-settings} ### format_template_resultset {#format_template_resultset} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 0b905df21d4..3d6d776f4da 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1590,6 +1590,22 @@ Possible values: Default value: `default`. +## parallel_replicas_custom_key_range_lower {#parallel_replicas_custom_key_range_lower} + +Allows the filter type `range` to split the work evenly between replicas based on the custom range `[parallel_replicas_custom_key_range_lower, INT_MAX]`. + +When used in conjuction with [parallel_replicas_custom_key_range_upper](#parallel_replicas_custom_key_range_upper), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`. + +Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing. + +## parallel_replicas_custom_key_range_upper {#parallel_replicas_custom_key_range_upper} + +Allows the filter type `range` to split the work evenly between replicas based on the custom range `[0, parallel_replicas_custom_key_range_upper]`. A value of 0 disables the upper bound, setting it the max value of the custom key expression. + +When used in conjuction with [parallel_replicas_custom_key_range_lower](#parallel_replicas_custom_key_range_lower), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`. + +Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing. + ## allow_experimental_parallel_reading_from_replicas Enables or disables sending SELECT queries to all replicas of a table (up to `max_parallel_replicas`). Reading is parallelized and coordinated dynamically. It will work for any kind of MergeTree table. @@ -3170,6 +3186,18 @@ Possible values: Default value: `0`. +## lightweight_deletes_sync {#lightweight_deletes_sync} + +The same as 'mutation_sync', but controls only execution of lightweight deletes. + +Possible values: + +- 0 - Mutations execute asynchronously. +- 1 - The query waits for the lightweight deletes to complete on the current server. +- 2 - The query waits for the lightweight deletes to complete on all replicas (if they exist). + +Default value: `2`. + **See Also** - [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) @@ -3850,6 +3878,10 @@ Possible values: Default value: 30. +:::note +It's applicable only to the default profile. A server reboot is required for the changes to take effect. +::: + ## http_receive_timeout {#http_receive_timeout} HTTP receive timeout (in seconds). @@ -5108,7 +5140,7 @@ a Tuple( ) ``` -## allow_experimental_statistic {#allow_experimental_statistic} +## allow_experimental_statistics {#allow_experimental_statistics} Allows defining columns with [statistics](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) and [manipulate statistics](../../engines/table-engines/mergetree-family/mergetree.md#column-statistics). @@ -5118,7 +5150,7 @@ Allows using statistic to optimize the order of [prewhere conditions](../../sql- ## analyze_index_with_space_filling_curves -If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis. +If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)` or `ORDER BY hilbertEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis. ## query_plan_enable_optimizations {#query_plan_enable_optimizations} @@ -5386,6 +5418,15 @@ When set to `false` than all attempts are made with identical timeouts. Default value: `true`. +## allow_deprecated_snowflake_conversion_functions {#allow_deprecated_snowflake_conversion_functions} + +Functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake` are deprecated and disabled by default. +Please use functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` instead. + +To re-enable the deprecated functions (e.g., during a transition period), please set this setting to `true`. + +Default value: `false` + ## allow_experimental_variant_type {#allow_experimental_variant_type} Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md). diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index 81725b97e41..762d187917c 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -639,6 +639,10 @@ An internal metric of the low-level memory allocator (jemalloc). See https://jem An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html +### jemalloc.prof.active + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + **See Also** - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/error_log.md b/docs/en/operations/system-tables/error_log.md new file mode 100644 index 00000000000..15edef58662 --- /dev/null +++ b/docs/en/operations/system-tables/error_log.md @@ -0,0 +1,39 @@ +--- +slug: /en/operations/system-tables/error_log +--- +# error_log + +Contains history of error values from table `system.errors`, periodically flushed to disk. + +Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code number of the error. +- `error` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) - Name of the error. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of times this error happened. +- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Remote exception (i.e. received during one of the distributed queries). + +**Example** + +``` sql +SELECT * FROM system.error_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +hostname: clickhouse.eu-central1.internal +event_date: 2024-06-18 +event_time: 2024-06-18 07:32:39 +code: 999 +error: KEEPER_EXCEPTION +value: 2 +remote: 0 +``` + +**See also** + +- [error_log setting](../../operations/server-configuration-parameters/settings.md#error_log) — Enabling and disabling the setting. +- [system.errors](../../operations/system-tables/errors.md) — Contains error codes with the number of times they have been triggered. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 75b855966a3..47094eec3f0 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -113,6 +113,8 @@ Columns: - `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. +- `used_privileges` ([Array(String)](../../sql-reference/data-types/array.md)) - Privileges which were successfully checked during query execution. +- `missing_privileges` ([Array(String)](../../sql-reference/data-types/array.md)) - Privileges that are missing during query execution. - `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: - `'Unknown'` = Status unknown. - `'None'` = The query result was neither written into nor read from the query cache. @@ -194,6 +196,8 @@ used_formats: [] used_functions: [] used_storages: [] used_table_functions: [] +used_privileges: [] +missing_privileges: [] query_cache_usage: None ``` diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index ed22679a3e6..df041f5885e 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -36,9 +36,24 @@ $ echo 0 | sudo tee /proc/sys/vm/overcommit_memory Use `perf top` to watch the time spent in the kernel for memory management. Permanent huge pages also do not need to be allocated. -:::warning -If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate. -::: +### Using less than 16GB of RAM + +The recommended amount of RAM is 32 GB or more. + +If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. You can use ClickHouse in a system with a small amount of RAM (as low as 2 GB), but these setups require additional tuning and can only ingest at a low rate. + +When using ClickHouse with less than 16GB of RAM, we recommend the following: + +- Lower the size of the mark cache in the `config.xml`. It can be set as low as 500 MB, but it cannot be set to zero. +- Lower the number of query processing threads down to `1`. +- Lower the `max_block_size` to `8192`. Values as low as `1024` can still be practical. +- Lower `max_download_threads` to `1`. +- Set `input_format_parallel_parsing` and `output_format_parallel_formatting` to `0`. + +Additional notes: +- To flush the memory cached by the memory allocator, you can run the `SYSTEM JEMALLOC PURGE` +command. +- We do not recommend using S3 or Kafka integrations on low-memory machines because they require significant memory for buffers. ## Storage Subsystem {#storage-subsystem} diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 93a3fecf3c6..f19643a3fa5 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -236,10 +236,10 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec. Previous example is the same as: ``` bash -$ echo -e "1,2\n3,4" | clickhouse-local --query " +$ echo -e "1,2\n3,4" | clickhouse-local -n --query " CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; - DROP TABLE table" + DROP TABLE table;" Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec. 1 2 3 4 diff --git a/docs/en/operations/utilities/odbc-bridge.md b/docs/en/operations/utilities/odbc-bridge.md index abb8860880e..eb849c6b6ae 100644 --- a/docs/en/operations/utilities/odbc-bridge.md +++ b/docs/en/operations/utilities/odbc-bridge.md @@ -18,7 +18,7 @@ This tool works via HTTP, not via pipes, shared memory, or TCP because: However it can be used as standalone tool from command line with the following parameters in POST-request URL: - `connection_string` -- ODBC connection string. -- `columns` -- columns in ClickHouse NamesAndTypesList format, name in backticks, +- `sample_block` -- columns description in ClickHouse NamesAndTypesList format, name in backticks, type as string. Name and type are space separated, rows separated with newline. - `max_block_size` -- optional parameter, sets maximum size of single block. diff --git a/docs/en/operations/workload-scheduling.md b/docs/en/operations/workload-scheduling.md index 24149099892..08629492ec6 100644 --- a/docs/en/operations/workload-scheduling.md +++ b/docs/en/operations/workload-scheduling.md @@ -47,6 +47,8 @@ Example: Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting. +It is possible to assign a `workload` setting for background activities. Merges and mutations are using `merge_workload` and `mutation_workload` server settings correspondingly. These values can also be overridden for specific tables using `merge_workload` and `mutation_workload` merge tree settings + Let's consider an example of a system with two different workloads: "production" and "development". ```sql @@ -151,6 +153,9 @@ Example: ``` - ## See also - [system.scheduler](/docs/en/operations/system-tables/scheduler.md) + - [merge_workload](/docs/en/operations/settings/merge-tree-settings.md#merge_workload) merge tree setting + - [merge_workload](/docs/en/operations/server-configuration-parameters/settings.md#merge_workload) global server setting + - [mutation_workload](/docs/en/operations/settings/merge-tree-settings.md#mutation_workload) merge tree setting + - [mutation_workload](/docs/en/operations/server-configuration-parameters/settings.md#mutation_workload) global server setting diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index 8ccc5e292b5..e30aa66b3b3 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -106,8 +106,8 @@ To work with these states, use: - [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine. - [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function. - [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function. -- [-Merge](#aggregate_functions_combinators-merge) combinator. -- [-MergeState](#aggregate_functions_combinators-mergestate) combinator. +- [-Merge](#-merge) combinator. +- [-MergeState](#-mergestate) combinator. ## -Merge diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 1dc89b8dcf9..093d88f939f 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -82,10 +82,12 @@ FROM In this case, you should remember that you do not know the histogram bin borders. -## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) +## sequenceMatch Checks whether the sequence contains an event chain that matches the pattern. +**Syntax** + ``` sql sequenceMatch(pattern)(timestamp, cond1, cond2, ...) ``` @@ -102,7 +104,7 @@ Events that occur at the same second may lay in the sequence in an undefined ord **Parameters** -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +- `pattern` — Pattern string. See [Pattern syntax](#sequencematch). **Returned values** @@ -170,9 +172,9 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM **See Also** -- [sequenceCount](#function-sequencecount) +- [sequenceCount](#sequencecount) -## sequenceCount(pattern)(time, cond1, cond2, ...) +## sequenceCount Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched. @@ -180,6 +182,8 @@ Counts the number of event chains that matched the pattern. The function searche Events that occur at the same second may lay in the sequence in an undefined order affecting the result. ::: +**Syntax** + ``` sql sequenceCount(pattern)(timestamp, cond1, cond2, ...) ``` @@ -192,7 +196,7 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...) **Parameters** -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +- `pattern` — Pattern string. See [Pattern syntax](#sequencematch). **Returned values** @@ -229,7 +233,7 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t **See Also** -- [sequenceMatch](#function-sequencematch) +- [sequenceMatch](#sequencematch) ## windowFunnel diff --git a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md index d9b44b3ff07..56e54d3faf9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md +++ b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/analysis_of_variance -sidebar_position: 6 +sidebar_position: 101 --- # analysisOfVariance diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index f1b5a6683e5..cdff7dde4a9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/any -sidebar_position: 6 +sidebar_position: 102 --- # any diff --git a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md index 9fbc21910f8..9c6e6b5fead 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/anyheavy -sidebar_position: 103 +sidebar_position: 104 --- # anyHeavy diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast.md b/docs/en/sql-reference/aggregate-functions/reference/anylast.md index 8fcee2cf8e6..e43bc07fbdc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/anylast -sidebar_position: 104 +sidebar_position: 105 --- # anyLast diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md index b6d0806f35d..8f093cfdb61 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/anylast_respect_nulls -sidebar_position: 104 +sidebar_position: 106 --- # anyLast_respect_nulls diff --git a/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md b/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md index 2bb43a9f665..ea2083ebd04 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/approxtopk -sidebar_position: 212 +sidebar_position: 107 --- # approx_top_k diff --git a/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md b/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md index aa884b26d8e..639142331f0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/approxtopsum -sidebar_position: 212 +sidebar_position: 108 --- # approx_top_sum diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 2274dd4a5dc..8c6b2b532e8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/argmax -sidebar_position: 106 +sidebar_position: 109 --- # argMax diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index 297744fb1db..0ab21fe2b52 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/argmin -sidebar_position: 105 +sidebar_position: 110 --- # argMin diff --git a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md index 3c71129bdb5..c0ac0db33f3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/array_concat_agg -sidebar_position: 110 +sidebar_position: 111 --- # array_concat_agg diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md index 5463d8a1874..7789c30bfe0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/avg -sidebar_position: 5 +sidebar_position: 112 --- # avg diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md index 99d3bac763d..304d0407d98 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/avgweighted -sidebar_position: 107 +sidebar_position: 113 --- # avgWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/boundrat.md b/docs/en/sql-reference/aggregate-functions/reference/boundrat.md index f3907af8030..d253a250600 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/boundrat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/boundrat.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/boundingRatio -sidebar_position: 2 +sidebar_position: 114 title: boundingRatio --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md b/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md index 57edb47950a..7983c3f2e60 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md +++ b/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/categoricalinformationvalue -sidebar_position: 250 +sidebar_position: 115 title: categoricalInformationValue --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/contingency.md b/docs/en/sql-reference/aggregate-functions/reference/contingency.md index 902c1f4af80..a49ff22febc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/contingency.md +++ b/docs/en/sql-reference/aggregate-functions/reference/contingency.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/contingency -sidebar_position: 350 +sidebar_position: 116 --- # contingency diff --git a/docs/en/sql-reference/aggregate-functions/reference/corr.md b/docs/en/sql-reference/aggregate-functions/reference/corr.md index 5681c942169..c43b4d3b25a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corr.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/corr -sidebar_position: 107 +sidebar_position: 117 --- # corr diff --git a/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md index 718477b28dd..96978863646 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/corrmatrix -sidebar_position: 108 +sidebar_position: 118 --- # corrMatrix diff --git a/docs/en/sql-reference/aggregate-functions/reference/corrstable.md b/docs/en/sql-reference/aggregate-functions/reference/corrstable.md index b35442a32b6..979cf244245 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corrstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corrstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/corrstable -sidebar_position: 107 +sidebar_position: 119 --- # corrStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index ca4067c8d8c..e6f2cdd6aa9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/count -sidebar_position: 1 +sidebar_position: 120 --- # count diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md index 78b9f4cffea..7231f92b8fa 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarpop -sidebar_position: 37 +sidebar_position: 121 --- # covarPop diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md index d7400599a49..c8811b3811e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarpopmatrix -sidebar_position: 36 +sidebar_position: 122 --- # covarPopMatrix diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md index 68e78fc3bd8..48e5368faac 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarpopstable -sidebar_position: 36 +sidebar_position: 123 --- # covarPopStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md index 7d5d5d13f35..92fe213b407 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarsamp -sidebar_position: 37 +sidebar_position: 124 --- # covarSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md index b71d753f0be..1585c4a9970 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarsampmatrix -sidebar_position: 38 +sidebar_position: 125 --- # covarSampMatrix diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md index 3e6867b96d6..6764877768e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarsampstable -sidebar_position: 37 +sidebar_position: 126 --- # covarSampStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md index 2424ff95237..db0e1c5eb4c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/cramersv -sidebar_position: 351 +sidebar_position: 127 --- # cramersV diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md index 939c04e3fdc..2ff7ce489d3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/cramersvbiascorrected -sidebar_position: 352 +sidebar_position: 128 --- # cramersVBiasCorrected diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md index 37d9d08cbdb..650135ecfeb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/deltasum -sidebar_position: 141 +sidebar_position: 129 --- # deltaSum diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md index c51d86389b0..ec5cfa5fecc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/deltasumtimestamp -sidebar_position: 141 +sidebar_position: 130 title: deltaSumTimestamp --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/entropy.md b/docs/en/sql-reference/aggregate-functions/reference/entropy.md index fc8d627ecab..7970cdd268b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/entropy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/entropy.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/entropy -sidebar_position: 302 +sidebar_position: 131 --- # entropy diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md index 75041ace7a3..3086a48f819 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md @@ -1,7 +1,7 @@ --- -slug: /en/sql-reference/aggregate-functions/reference/exponentialmovingaverage -sidebar_position: 108 -sidebar_title: exponentialMovingAverage +slug: /en/sql-reference/aggregate-functions/reference/exponentialMovingAverage +sidebar_position: 132 +title: exponentialMovingAverage --- ## exponentialMovingAverage @@ -96,56 +96,56 @@ Result: ``` text ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────────────────────────────────────┐ -│ 1 │ 0 │ 0.067 │ ███▎ │ +│ 1 │ 0 │ 0.067 │ ███▎ │ │ 0 │ 1 │ 0.062 │ ███ │ -│ 0 │ 2 │ 0.058 │ ██▊ │ -│ 0 │ 3 │ 0.054 │ ██▋ │ +│ 0 │ 2 │ 0.058 │ ██▊ │ +│ 0 │ 3 │ 0.054 │ ██▋ │ │ 0 │ 4 │ 0.051 │ ██▌ │ -│ 0 │ 5 │ 0.047 │ ██▎ │ -│ 0 │ 6 │ 0.044 │ ██▏ │ +│ 0 │ 5 │ 0.047 │ ██▎ │ +│ 0 │ 6 │ 0.044 │ ██▏ │ │ 0 │ 7 │ 0.041 │ ██ │ -│ 0 │ 8 │ 0.038 │ █▊ │ -│ 0 │ 9 │ 0.036 │ █▋ │ -│ 0 │ 10 │ 0.033 │ █▋ │ +│ 0 │ 8 │ 0.038 │ █▊ │ +│ 0 │ 9 │ 0.036 │ █▋ │ +│ 0 │ 10 │ 0.033 │ █▋ │ │ 0 │ 11 │ 0.031 │ █▌ │ -│ 0 │ 12 │ 0.029 │ █▍ │ -│ 0 │ 13 │ 0.027 │ █▎ │ -│ 0 │ 14 │ 0.025 │ █▎ │ -│ 0 │ 15 │ 0.024 │ █▏ │ +│ 0 │ 12 │ 0.029 │ █▍ │ +│ 0 │ 13 │ 0.027 │ █▎ │ +│ 0 │ 14 │ 0.025 │ █▎ │ +│ 0 │ 15 │ 0.024 │ █▏ │ │ 0 │ 16 │ 0.022 │ █ │ │ 0 │ 17 │ 0.021 │ █ │ -│ 0 │ 18 │ 0.019 │ ▊ │ -│ 0 │ 19 │ 0.018 │ ▊ │ -│ 0 │ 20 │ 0.017 │ ▋ │ -│ 0 │ 21 │ 0.016 │ ▋ │ -│ 0 │ 22 │ 0.015 │ ▋ │ -│ 0 │ 23 │ 0.014 │ ▋ │ -│ 0 │ 24 │ 0.013 │ ▋ │ -│ 1 │ 25 │ 0.079 │ ███▊ │ +│ 0 │ 18 │ 0.019 │ ▊ │ +│ 0 │ 19 │ 0.018 │ ▊ │ +│ 0 │ 20 │ 0.017 │ ▋ │ +│ 0 │ 21 │ 0.016 │ ▋ │ +│ 0 │ 22 │ 0.015 │ ▋ │ +│ 0 │ 23 │ 0.014 │ ▋ │ +│ 0 │ 24 │ 0.013 │ ▋ │ +│ 1 │ 25 │ 0.079 │ ███▊ │ │ 1 │ 26 │ 0.14 │ ███████ │ -│ 1 │ 27 │ 0.198 │ █████████▊ │ +│ 1 │ 27 │ 0.198 │ █████████▊ │ │ 1 │ 28 │ 0.252 │ ████████████▌ │ │ 1 │ 29 │ 0.302 │ ███████████████ │ -│ 1 │ 30 │ 0.349 │ █████████████████▍ │ +│ 1 │ 30 │ 0.349 │ █████████████████▍ │ │ 1 │ 31 │ 0.392 │ ███████████████████▌ │ -│ 1 │ 32 │ 0.433 │ █████████████████████▋ │ +│ 1 │ 32 │ 0.433 │ █████████████████████▋ │ │ 1 │ 33 │ 0.471 │ ███████████████████████▌ │ -│ 1 │ 34 │ 0.506 │ █████████████████████████▎ │ -│ 1 │ 35 │ 0.539 │ ██████████████████████████▊ │ +│ 1 │ 34 │ 0.506 │ █████████████████████████▎ │ +│ 1 │ 35 │ 0.539 │ ██████████████████████████▊ │ │ 1 │ 36 │ 0.57 │ ████████████████████████████▌ │ -│ 1 │ 37 │ 0.599 │ █████████████████████████████▊ │ -│ 1 │ 38 │ 0.626 │ ███████████████████████████████▎ │ +│ 1 │ 37 │ 0.599 │ █████████████████████████████▊ │ +│ 1 │ 38 │ 0.626 │ ███████████████████████████████▎ │ │ 1 │ 39 │ 0.651 │ ████████████████████████████████▌ │ -│ 1 │ 40 │ 0.674 │ █████████████████████████████████▋ │ -│ 1 │ 41 │ 0.696 │ ██████████████████████████████████▋ │ -│ 1 │ 42 │ 0.716 │ ███████████████████████████████████▋ │ -│ 1 │ 43 │ 0.735 │ ████████████████████████████████████▋ │ -│ 1 │ 44 │ 0.753 │ █████████████████████████████████████▋ │ -│ 1 │ 45 │ 0.77 │ ██████████████████████████████████████▍ │ -│ 1 │ 46 │ 0.785 │ ███████████████████████████████████████▎ │ -│ 1 │ 47 │ 0.8 │ ███████████████████████████████████████▊ │ -│ 1 │ 48 │ 0.813 │ ████████████████████████████████████████▋ │ -│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎│ +│ 1 │ 40 │ 0.674 │ █████████████████████████████████▋ │ +│ 1 │ 41 │ 0.696 │ ██████████████████████████████████▋ │ +│ 1 │ 42 │ 0.716 │ ███████████████████████████████████▋ │ +│ 1 │ 43 │ 0.735 │ ████████████████████████████████████▋ │ +│ 1 │ 44 │ 0.753 │ █████████████████████████████████████▋ │ +│ 1 │ 45 │ 0.77 │ ██████████████████████████████████████▍ │ +│ 1 │ 46 │ 0.785 │ ███████████████████████████████████████▎ │ +│ 1 │ 47 │ 0.8 │ ███████████████████████████████████████▊ │ +│ 1 │ 48 │ 0.813 │ ████████████████████████████████████████▋ │ +│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎ │ └───────┴──────┴──────────────────────┴────────────────────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedavg.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedavg.md new file mode 100644 index 00000000000..c729552749a --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedavg.md @@ -0,0 +1,105 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedAvg +sidebar_position: 133 +title: exponentialTimeDecayedAvg +--- + +## exponentialTimeDecayedAvg + +Returns the exponentially smoothed weighted moving average of values of a time series at point `t` in time. + +**Syntax** + +```sql +exponentialTimeDecayedAvg(x)(v, t) +``` + +**Arguments** + +- `v` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `t` — Time. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns an exponentially smoothed weighted moving average at index `t` in time. [Float64](../../data-types/float.md). + +**Examples** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 5, 50) AS bar +FROM + ( + SELECT + (number = 0) OR (number >= 25) AS value, + number AS time, + exponentialTimeDecayedAvg(10)(value, time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) + ); +``` + +Response: + +```sql + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────┐ +1. │ 1 │ 0 │ 1 │ ██████████ │ +2. │ 0 │ 1 │ 0.475 │ ████▊ │ +3. │ 0 │ 2 │ 0.301 │ ███ │ +4. │ 0 │ 3 │ 0.214 │ ██▏ │ +5. │ 0 │ 4 │ 0.162 │ █▌ │ +6. │ 0 │ 5 │ 0.128 │ █▎ │ +7. │ 0 │ 6 │ 0.104 │ █ │ +8. │ 0 │ 7 │ 0.086 │ ▊ │ +9. │ 0 │ 8 │ 0.072 │ ▋ │ +0. │ 0 │ 9 │ 0.061 │ ▌ │ +1. │ 0 │ 10 │ 0.052 │ ▌ │ +2. │ 0 │ 11 │ 0.045 │ ▍ │ +3. │ 0 │ 12 │ 0.039 │ ▍ │ +4. │ 0 │ 13 │ 0.034 │ ▎ │ +5. │ 0 │ 14 │ 0.03 │ ▎ │ +6. │ 0 │ 15 │ 0.027 │ ▎ │ +7. │ 0 │ 16 │ 0.024 │ ▏ │ +8. │ 0 │ 17 │ 0.021 │ ▏ │ +9. │ 0 │ 18 │ 0.018 │ ▏ │ +0. │ 0 │ 19 │ 0.016 │ ▏ │ +1. │ 0 │ 20 │ 0.015 │ ▏ │ +2. │ 0 │ 21 │ 0.013 │ ▏ │ +3. │ 0 │ 22 │ 0.012 │ │ +4. │ 0 │ 23 │ 0.01 │ │ +5. │ 0 │ 24 │ 0.009 │ │ +6. │ 1 │ 25 │ 0.111 │ █ │ +7. │ 1 │ 26 │ 0.202 │ ██ │ +8. │ 1 │ 27 │ 0.283 │ ██▊ │ +9. │ 1 │ 28 │ 0.355 │ ███▌ │ +0. │ 1 │ 29 │ 0.42 │ ████▏ │ +1. │ 1 │ 30 │ 0.477 │ ████▊ │ +2. │ 1 │ 31 │ 0.529 │ █████▎ │ +3. │ 1 │ 32 │ 0.576 │ █████▊ │ +4. │ 1 │ 33 │ 0.618 │ ██████▏ │ +5. │ 1 │ 34 │ 0.655 │ ██████▌ │ +6. │ 1 │ 35 │ 0.689 │ ██████▉ │ +7. │ 1 │ 36 │ 0.719 │ ███████▏ │ +8. │ 1 │ 37 │ 0.747 │ ███████▍ │ +9. │ 1 │ 38 │ 0.771 │ ███████▋ │ +0. │ 1 │ 39 │ 0.793 │ ███████▉ │ +1. │ 1 │ 40 │ 0.813 │ ████████▏ │ +2. │ 1 │ 41 │ 0.831 │ ████████▎ │ +3. │ 1 │ 42 │ 0.848 │ ████████▍ │ +4. │ 1 │ 43 │ 0.862 │ ████████▌ │ +5. │ 1 │ 44 │ 0.876 │ ████████▊ │ +6. │ 1 │ 45 │ 0.888 │ ████████▉ │ +7. │ 1 │ 46 │ 0.898 │ ████████▉ │ +8. │ 1 │ 47 │ 0.908 │ █████████ │ +9. │ 1 │ 48 │ 0.917 │ █████████▏ │ +0. │ 1 │ 49 │ 0.925 │ █████████▏ │ + └───────┴──────┴──────────────────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedcount.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedcount.md new file mode 100644 index 00000000000..b73d6c2503d --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedcount.md @@ -0,0 +1,104 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedCount +sidebar_position: 134 +title: exponentialTimeDecayedCount +--- + +## exponentialTimeDecayedCount + +Returns the cumulative exponential decay over a time series at the index `t` in time. + +**Syntax** + +```sql +exponentialTimeDecayedCount(x)(t) +``` + +**Arguments** + +- `t` — Time. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns the cumulative exponential decay at the given point in time. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 20, 50) AS bar +FROM +( + SELECT + (number % 5) = 0 AS value, + number AS time, + exponentialTimeDecayedCount(10)(time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) +); +``` + +Result: + +```response + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────────────────────┐ + 1. │ 1 │ 0 │ 1 │ ██▌ │ + 2. │ 0 │ 1 │ 1.905 │ ████▊ │ + 3. │ 0 │ 2 │ 2.724 │ ██████▊ │ + 4. │ 0 │ 3 │ 3.464 │ ████████▋ │ + 5. │ 0 │ 4 │ 4.135 │ ██████████▎ │ + 6. │ 1 │ 5 │ 4.741 │ ███████████▊ │ + 7. │ 0 │ 6 │ 5.29 │ █████████████▏ │ + 8. │ 0 │ 7 │ 5.787 │ ██████████████▍ │ + 9. │ 0 │ 8 │ 6.236 │ ███████████████▌ │ +10. │ 0 │ 9 │ 6.643 │ ████████████████▌ │ +11. │ 1 │ 10 │ 7.01 │ █████████████████▌ │ +12. │ 0 │ 11 │ 7.343 │ ██████████████████▎ │ +13. │ 0 │ 12 │ 7.644 │ ███████████████████ │ +14. │ 0 │ 13 │ 7.917 │ ███████████████████▊ │ +15. │ 0 │ 14 │ 8.164 │ ████████████████████▍ │ +16. │ 1 │ 15 │ 8.387 │ ████████████████████▉ │ +17. │ 0 │ 16 │ 8.589 │ █████████████████████▍ │ +18. │ 0 │ 17 │ 8.771 │ █████████████████████▉ │ +19. │ 0 │ 18 │ 8.937 │ ██████████████████████▎ │ +20. │ 0 │ 19 │ 9.086 │ ██████████████████████▋ │ +21. │ 1 │ 20 │ 9.222 │ ███████████████████████ │ +22. │ 0 │ 21 │ 9.344 │ ███████████████████████▎ │ +23. │ 0 │ 22 │ 9.455 │ ███████████████████████▋ │ +24. │ 0 │ 23 │ 9.555 │ ███████████████████████▉ │ +25. │ 0 │ 24 │ 9.646 │ ████████████████████████ │ +26. │ 1 │ 25 │ 9.728 │ ████████████████████████▎ │ +27. │ 0 │ 26 │ 9.802 │ ████████████████████████▌ │ +28. │ 0 │ 27 │ 9.869 │ ████████████████████████▋ │ +29. │ 0 │ 28 │ 9.93 │ ████████████████████████▊ │ +30. │ 0 │ 29 │ 9.985 │ ████████████████████████▉ │ +31. │ 1 │ 30 │ 10.035 │ █████████████████████████ │ +32. │ 0 │ 31 │ 10.08 │ █████████████████████████▏ │ +33. │ 0 │ 32 │ 10.121 │ █████████████████████████▎ │ +34. │ 0 │ 33 │ 10.158 │ █████████████████████████▍ │ +35. │ 0 │ 34 │ 10.191 │ █████████████████████████▍ │ +36. │ 1 │ 35 │ 10.221 │ █████████████████████████▌ │ +37. │ 0 │ 36 │ 10.249 │ █████████████████████████▌ │ +38. │ 0 │ 37 │ 10.273 │ █████████████████████████▋ │ +39. │ 0 │ 38 │ 10.296 │ █████████████████████████▋ │ +40. │ 0 │ 39 │ 10.316 │ █████████████████████████▊ │ +41. │ 1 │ 40 │ 10.334 │ █████████████████████████▊ │ +42. │ 0 │ 41 │ 10.351 │ █████████████████████████▉ │ +43. │ 0 │ 42 │ 10.366 │ █████████████████████████▉ │ +44. │ 0 │ 43 │ 10.379 │ █████████████████████████▉ │ +45. │ 0 │ 44 │ 10.392 │ █████████████████████████▉ │ +46. │ 1 │ 45 │ 10.403 │ ██████████████████████████ │ +47. │ 0 │ 46 │ 10.413 │ ██████████████████████████ │ +48. │ 0 │ 47 │ 10.422 │ ██████████████████████████ │ +49. │ 0 │ 48 │ 10.43 │ ██████████████████████████ │ +50. │ 0 │ 49 │ 10.438 │ ██████████████████████████ │ + └───────┴──────┴──────────────────────┴────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedmax.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedmax.md new file mode 100644 index 00000000000..06dc5313904 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedmax.md @@ -0,0 +1,105 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedMax +sidebar_position: 135 +title: exponentialTimeDecayedMax +--- + +## exponentialTimeDecayedMax + +Returns the maximum of the computed exponentially smoothed moving average at index `t` in time with that at `t-1`. + +**Syntax** + +```sql +exponentialTimeDecayedMax(x)(value, timeunit) +``` + +**Arguments** + +- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns the maximum of the exponentially smoothed weighted moving average at `t` and `t-1`. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 5, 50) AS bar +FROM + ( + SELECT + (number = 0) OR (number >= 25) AS value, + number AS time, + exponentialTimeDecayedMax(10)(value, time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) + ); +``` + +Result: + +```response + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────┐ + 1. │ 1 │ 0 │ 1 │ ██████████ │ + 2. │ 0 │ 1 │ 0.905 │ █████████ │ + 3. │ 0 │ 2 │ 0.819 │ ████████▏ │ + 4. │ 0 │ 3 │ 0.741 │ ███████▍ │ + 5. │ 0 │ 4 │ 0.67 │ ██████▋ │ + 6. │ 0 │ 5 │ 0.607 │ ██████ │ + 7. │ 0 │ 6 │ 0.549 │ █████▍ │ + 8. │ 0 │ 7 │ 0.497 │ ████▉ │ + 9. │ 0 │ 8 │ 0.449 │ ████▍ │ +10. │ 0 │ 9 │ 0.407 │ ████ │ +11. │ 0 │ 10 │ 0.368 │ ███▋ │ +12. │ 0 │ 11 │ 0.333 │ ███▎ │ +13. │ 0 │ 12 │ 0.301 │ ███ │ +14. │ 0 │ 13 │ 0.273 │ ██▋ │ +15. │ 0 │ 14 │ 0.247 │ ██▍ │ +16. │ 0 │ 15 │ 0.223 │ ██▏ │ +17. │ 0 │ 16 │ 0.202 │ ██ │ +18. │ 0 │ 17 │ 0.183 │ █▊ │ +19. │ 0 │ 18 │ 0.165 │ █▋ │ +20. │ 0 │ 19 │ 0.15 │ █▍ │ +21. │ 0 │ 20 │ 0.135 │ █▎ │ +22. │ 0 │ 21 │ 0.122 │ █▏ │ +23. │ 0 │ 22 │ 0.111 │ █ │ +24. │ 0 │ 23 │ 0.1 │ █ │ +25. │ 0 │ 24 │ 0.091 │ ▉ │ +26. │ 1 │ 25 │ 1 │ ██████████ │ +27. │ 1 │ 26 │ 1 │ ██████████ │ +28. │ 1 │ 27 │ 1 │ ██████████ │ +29. │ 1 │ 28 │ 1 │ ██████████ │ +30. │ 1 │ 29 │ 1 │ ██████████ │ +31. │ 1 │ 30 │ 1 │ ██████████ │ +32. │ 1 │ 31 │ 1 │ ██████████ │ +33. │ 1 │ 32 │ 1 │ ██████████ │ +34. │ 1 │ 33 │ 1 │ ██████████ │ +35. │ 1 │ 34 │ 1 │ ██████████ │ +36. │ 1 │ 35 │ 1 │ ██████████ │ +37. │ 1 │ 36 │ 1 │ ██████████ │ +38. │ 1 │ 37 │ 1 │ ██████████ │ +39. │ 1 │ 38 │ 1 │ ██████████ │ +40. │ 1 │ 39 │ 1 │ ██████████ │ +41. │ 1 │ 40 │ 1 │ ██████████ │ +42. │ 1 │ 41 │ 1 │ ██████████ │ +43. │ 1 │ 42 │ 1 │ ██████████ │ +44. │ 1 │ 43 │ 1 │ ██████████ │ +45. │ 1 │ 44 │ 1 │ ██████████ │ +46. │ 1 │ 45 │ 1 │ ██████████ │ +47. │ 1 │ 46 │ 1 │ ██████████ │ +48. │ 1 │ 47 │ 1 │ ██████████ │ +49. │ 1 │ 48 │ 1 │ ██████████ │ +50. │ 1 │ 49 │ 1 │ ██████████ │ + └───────┴──────┴──────────────────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedsum.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedsum.md new file mode 100644 index 00000000000..617cd265dac --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedsum.md @@ -0,0 +1,105 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedSum +sidebar_position: 136 +title: exponentialTimeDecayedSum +--- + +## exponentialTimeDecayedSum + +Returns the sum of exponentially smoothed moving average values of a time series at the index `t` in time. + +**Syntax** + +```sql +exponentialTimeDecayedSum(x)(v, t) +``` + +**Arguments** + +- `v` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `t` — Time. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns the sum of exponentially smoothed moving average values at the given point in time. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 10, 50) AS bar +FROM + ( + SELECT + (number = 0) OR (number >= 25) AS value, + number AS time, + exponentialTimeDecayedSum(10)(value, time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) + ); +``` + +Result: + +```response + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar───────────────────────────────────────────────┐ + 1. │ 1 │ 0 │ 1 │ █████ │ + 2. │ 0 │ 1 │ 0.905 │ ████▌ │ + 3. │ 0 │ 2 │ 0.819 │ ████ │ + 4. │ 0 │ 3 │ 0.741 │ ███▋ │ + 5. │ 0 │ 4 │ 0.67 │ ███▎ │ + 6. │ 0 │ 5 │ 0.607 │ ███ │ + 7. │ 0 │ 6 │ 0.549 │ ██▋ │ + 8. │ 0 │ 7 │ 0.497 │ ██▍ │ + 9. │ 0 │ 8 │ 0.449 │ ██▏ │ +10. │ 0 │ 9 │ 0.407 │ ██ │ +11. │ 0 │ 10 │ 0.368 │ █▊ │ +12. │ 0 │ 11 │ 0.333 │ █▋ │ +13. │ 0 │ 12 │ 0.301 │ █▌ │ +14. │ 0 │ 13 │ 0.273 │ █▎ │ +15. │ 0 │ 14 │ 0.247 │ █▏ │ +16. │ 0 │ 15 │ 0.223 │ █ │ +17. │ 0 │ 16 │ 0.202 │ █ │ +18. │ 0 │ 17 │ 0.183 │ ▉ │ +19. │ 0 │ 18 │ 0.165 │ ▊ │ +20. │ 0 │ 19 │ 0.15 │ ▋ │ +21. │ 0 │ 20 │ 0.135 │ ▋ │ +22. │ 0 │ 21 │ 0.122 │ ▌ │ +23. │ 0 │ 22 │ 0.111 │ ▌ │ +24. │ 0 │ 23 │ 0.1 │ ▌ │ +25. │ 0 │ 24 │ 0.091 │ ▍ │ +26. │ 1 │ 25 │ 1.082 │ █████▍ │ +27. │ 1 │ 26 │ 1.979 │ █████████▉ │ +28. │ 1 │ 27 │ 2.791 │ █████████████▉ │ +29. │ 1 │ 28 │ 3.525 │ █████████████████▋ │ +30. │ 1 │ 29 │ 4.19 │ ████████████████████▉ │ +31. │ 1 │ 30 │ 4.791 │ ███████████████████████▉ │ +32. │ 1 │ 31 │ 5.335 │ ██████████████████████████▋ │ +33. │ 1 │ 32 │ 5.827 │ █████████████████████████████▏ │ +34. │ 1 │ 33 │ 6.273 │ ███████████████████████████████▎ │ +35. │ 1 │ 34 │ 6.676 │ █████████████████████████████████▍ │ +36. │ 1 │ 35 │ 7.041 │ ███████████████████████████████████▏ │ +37. │ 1 │ 36 │ 7.371 │ ████████████████████████████████████▊ │ +38. │ 1 │ 37 │ 7.669 │ ██████████████████████████████████████▎ │ +39. │ 1 │ 38 │ 7.939 │ ███████████████████████████████████████▋ │ +40. │ 1 │ 39 │ 8.184 │ ████████████████████████████████████████▉ │ +41. │ 1 │ 40 │ 8.405 │ ██████████████████████████████████████████ │ +42. │ 1 │ 41 │ 8.605 │ ███████████████████████████████████████████ │ +43. │ 1 │ 42 │ 8.786 │ ███████████████████████████████████████████▉ │ +44. │ 1 │ 43 │ 8.95 │ ████████████████████████████████████████████▊ │ +45. │ 1 │ 44 │ 9.098 │ █████████████████████████████████████████████▍ │ +46. │ 1 │ 45 │ 9.233 │ ██████████████████████████████████████████████▏ │ +47. │ 1 │ 46 │ 9.354 │ ██████████████████████████████████████████████▊ │ +48. │ 1 │ 47 │ 9.464 │ ███████████████████████████████████████████████▎ │ +49. │ 1 │ 48 │ 9.563 │ ███████████████████████████████████████████████▊ │ +50. │ 1 │ 49 │ 9.653 │ ████████████████████████████████████████████████▎ │ + └───────┴──────┴──────────────────────┴───────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/first_value.md b/docs/en/sql-reference/aggregate-functions/reference/first_value.md index 0c26b66c64a..2cd0e1fa16f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/first_value.md +++ b/docs/en/sql-reference/aggregate-functions/reference/first_value.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/first_value -sidebar_position: 7 +sidebar_position: 137 --- # first_value diff --git a/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md new file mode 100644 index 00000000000..4abb3e03226 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md @@ -0,0 +1,95 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/flame_graph +sidebar_position: 138 +--- + +# flameGraph + +Aggregate function which builds a [flamegraph](https://www.brendangregg.com/flamegraphs.html) using the list of stacktraces. Outputs an array of strings which can be used by [flamegraph.pl utility](https://github.com/brendangregg/FlameGraph) to render an SVG of the flamegraph. + +## Syntax + +```sql +flameGraph(traces, [size], [ptr]) +``` + +## Parameters + +- `traces` — a stacktrace. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). +- `size` — an allocation size for memory profiling. (optional - default `1`). [UInt64](../../data-types/int-uint.md). +- `ptr` — an allocation address. (optional - default `0`). [UInt64](../../data-types/int-uint.md). + +:::note +In the case where `ptr != 0`, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr. +Only allocations which were not freed are shown. Non mapped deallocations are ignored. +::: + +## Returned value + +- An array of strings for use with [flamegraph.pl utility](https://github.com/brendangregg/FlameGraph). [Array](../../data-types/array.md)([String](../../data-types/string.md)). + +## Examples + +### Building a flamegraph based on a CPU query profiler + +```sql +SET query_profiler_cpu_time_period_ns=10000000; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +``` + +```text +clickhouse client --allow_introspection_functions=1 -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg +``` + +### Building a flamegraph based on a memory query profiler, showing all allocations + +```sql +SET memory_profiler_sample_probability=1, max_untracked_memory=1; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +``` + +```text +clickhouse client --allow_introspection_functions=1 -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg +``` + +### Building a flamegraph based on a memory query profiler, showing allocations which were not deallocated in query context + +```sql +SET memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +``` + +```text +clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, size, ptr)) FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg +``` + +### Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time + +```sql +SET memory_profiler_sample_probability=1, max_untracked_memory=1; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +``` + +- 1 - Memory usage per second + +```sql +SELECT event_time, m, formatReadableSize(max(s) as m) FROM (SELECT event_time, sum(size) OVER (ORDER BY event_time) AS s FROM system.trace_log WHERE query_id = 'xxx' AND trace_type = 'MemorySample') GROUP BY event_time ORDER BY event_time; +``` + +- 2 - Find a time point with maximal memory usage + +```sql +SELECT argMax(event_time, s), max(s) FROM (SELECT event_time, sum(size) OVER (ORDER BY event_time) AS s FROM system.trace_log WHERE query_id = 'xxx' AND trace_type = 'MemorySample'); +``` + +- 3 - Fix active allocations at fixed point of time + +```text +clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time <= 'yyy' ORDER BY event_time)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg +``` + +- 4 - Find deallocations at fixed point of time + +```text +clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, -size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time > 'yyy' ORDER BY event_time desc)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md index a38e35a72ad..1a87e3aeba9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparray -sidebar_position: 110 +sidebar_position: 139 --- # groupArray diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md index d745e8a0e7a..c6b23c2f808 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparrayinsertat -sidebar_position: 112 +sidebar_position: 140 --- # groupArrayInsertAt diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md index 5cac88be073..a370f595923 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparrayintersect -sidebar_position: 115 +sidebar_position: 141 --- # groupArrayIntersect diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md index 9b48ee54ecd..ff62dcdde9b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraylast -sidebar_position: 110 +sidebar_position: 142 --- # groupArrayLast diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md index 32c0608afeb..6b6c4830535 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraymovingavg -sidebar_position: 114 +sidebar_position: 143 --- # groupArrayMovingAvg diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md index 6f2a60dd080..d1fa6fce9b0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraymovingsum -sidebar_position: 113 +sidebar_position: 144 --- # groupArrayMovingSum diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md index 393087161df..38ddae48ee7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraysample -sidebar_position: 114 +sidebar_position: 145 --- # groupArraySample diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md index 9bee0c29e7a..22a150bb8fb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md @@ -1,6 +1,7 @@ - --- - toc_priority: 112 - --- +--- +slug: /en/sql-reference/aggregate-functions/reference/grouparraysorted +sidebar_position: 146 +--- # groupArraySorted {#groupArraySorted} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md index 3d833555a43..eee383d84e9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitand -sidebar_position: 125 +sidebar_position: 147 --- # groupBitAnd diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md index 02b9e0e8821..23b686e29b2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmap -sidebar_position: 128 +sidebar_position: 148 --- # groupBitmap diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md index 1e649645e75..77bbf7d3d2c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmapand -sidebar_position: 129 +sidebar_position: 149 title: groupBitmapAnd --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md index c88c80ceff2..7bb3dc689e8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmapor -sidebar_position: 130 +sidebar_position: 150 title: groupBitmapOr --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md index aa24b3d2128..3212e94a47b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmapxor -sidebar_position: 131 +sidebar_position: 151 title: groupBitmapXor --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md index 138ee998405..802b839d56e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitor -sidebar_position: 126 +sidebar_position: 152 --- # groupBitOr diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md index 168335a010c..94891891d64 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitxor -sidebar_position: 127 +sidebar_position: 153 --- # groupBitXor diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md b/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md index fe5f714c307..0462f4a4ab2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupuniqarray -sidebar_position: 111 +sidebar_position: 154 --- # groupUniqArray diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index a56b1c97681..e3725b6a430 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -58,6 +58,7 @@ ClickHouse-specific aggregate functions: - [topKWeighted](../reference/topkweighted.md) - [deltaSum](../reference/deltasum.md) - [deltaSumTimestamp](../reference/deltasumtimestamp.md) +- [flameGraph](../reference/flame_graph.md) - [groupArray](../reference/grouparray.md) - [groupArrayLast](../reference/grouparraylast.md) - [groupUniqArray](../reference/groupuniqarray.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md index 5990345b765..66e23a716ba 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/intervalLengthSum -sidebar_position: 146 +sidebar_position: 155 sidebar_label: intervalLengthSum title: intervalLengthSum --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md index d159eec7ce6..33afcdfbf38 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest -sidebar_position: 300 +sidebar_position: 156 sidebar_label: kolmogorovSmirnovTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md index e1a29973fcf..c543831addc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/kurtpop -sidebar_position: 153 +sidebar_position: 157 --- # kurtPop diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md index 911c2bfbe74..57e80729454 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/kurtsamp -sidebar_position: 154 +sidebar_position: 158 --- # kurtSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md b/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md index 06443994dd9..673f3cb69c7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md +++ b/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets -sidebar_position: 312 +sidebar_position: 159 sidebar_label: largestTriangleThreeBuckets --- @@ -24,6 +24,8 @@ Alias: `lttb`. - `x` — x coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md). - `y` — y coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md). +NaNs are ignored in the provided series, meaning that any NaN values will be excluded from the analysis. This ensures that the function operates only on valid numerical data. + **Parameters** - `n` — number of points in the resulting series. [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -61,7 +63,7 @@ Result: ``` text ┌────────largestTriangleThreeBuckets(4)(x, y)───────────┐ -│ [(1,10),(3,15),(5,40),(10,70)] │ +│ [(1,10),(3,15),(9,55),(10,70)] │ └───────────────────────────────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/last_value.md b/docs/en/sql-reference/aggregate-functions/reference/last_value.md index 21a86a5f130..b2aa5c86d81 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/last_value.md +++ b/docs/en/sql-reference/aggregate-functions/reference/last_value.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/last_value -sidebar_position: 8 +sidebar_position: 160 --- # last_value diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md index af744f445d9..17f6afecde2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/mannwhitneyutest -sidebar_position: 310 +sidebar_position: 161 sidebar_label: mannWhitneyUTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/max.md b/docs/en/sql-reference/aggregate-functions/reference/max.md index 4bb2145d683..12c8800ef7f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/max.md +++ b/docs/en/sql-reference/aggregate-functions/reference/max.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/max -sidebar_position: 3 +sidebar_position: 162 title: max --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md index db99b900a3e..c65e31114ff 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/maxintersections -sidebar_position: 360 +sidebar_position: 163 title: maxIntersections --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md index 7dd63f09316..d5c2b0bd3c2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/maxintersectionsposition -sidebar_position: 361 +sidebar_position: 164 title: maxIntersectionsPosition --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md index 4d8c67e1b90..c9c6913249c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/maxmap -sidebar_position: 143 +sidebar_position: 165 --- # maxMap diff --git a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md index 1cf2bebf26f..19afb5ae742 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/meanztest -sidebar_position: 303 +sidebar_position: 166 sidebar_label: meanZTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index 2a166c83dad..dcf174254ac 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/median -sidebar_position: 212 +sidebar_position: 167 --- # median diff --git a/docs/en/sql-reference/aggregate-functions/reference/min.md b/docs/en/sql-reference/aggregate-functions/reference/min.md index cca515b76e8..6bfcaf020c8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/min.md +++ b/docs/en/sql-reference/aggregate-functions/reference/min.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/min -sidebar_position: 2 +sidebar_position: 168 title: min --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/minmap.md b/docs/en/sql-reference/aggregate-functions/reference/minmap.md index 5436e1fc6a6..b1fbb9e49f3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/minmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/minmap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/minmap -sidebar_position: 142 +sidebar_position: 169 --- # minMap diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md index 91b6b1b0d80..d5278125cbc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantile -sidebar_position: 200 +sidebar_position: 170 --- # quantile diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md b/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md index 7352781d126..9582f264a6f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileGK -sidebar_position: 204 +sidebar_position: 175 --- # quantileGK diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md index 4377f2f1b17..4469438db6a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantilebfloat16 -sidebar_position: 209 +sidebar_position: 171 title: quantileBFloat16 --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md index f9acd2e20cb..fc9db7ef08d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch -sidebar_position: 211 +sidebar_position: 171 title: quantileDD --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md index 7235c47da70..0ac4b5e3a51 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiledeterministic -sidebar_position: 206 +sidebar_position: 172 --- # quantileDeterministic diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index d7d7413c283..46873bcd2b6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileexact -sidebar_position: 202 +sidebar_position: 173 --- # quantileExact Functions diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md index 34def8d7411..4ce212888c4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileexactweighted -sidebar_position: 203 +sidebar_position: 174 --- # quantileExactWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md index 41d2627fb7b..9eb4fde6102 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileInterpolatedWeighted -sidebar_position: 203 +sidebar_position: 176 --- # quantileInterpolatedWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 856d447ac13..e2c3295221d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiles -sidebar_position: 201 +sidebar_position: 177 --- # quantiles Functions diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md index 796e87b02d8..ece54ca24ab 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletdigest -sidebar_position: 207 +sidebar_position: 178 --- # quantileTDigest diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index b3e21e0e69e..7f8f7f53a97 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletdigestweighted -sidebar_position: 208 +sidebar_position: 179 --- # quantileTDigestWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md index b5b1c8a0c01..78050fe5b5e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletiming -sidebar_position: 204 +sidebar_position: 180 --- # quantileTiming diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index df483aac01e..c5fff0825c3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletimingweighted -sidebar_position: 205 +sidebar_position: 181 --- # quantileTimingWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md index 27f2dd124e4..eb995923d97 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/rankCorr -sidebar_position: 145 +sidebar_position: 182 --- # rankCorr diff --git a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md index ea3dbff8691..2aebccfdc53 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/simplelinearregression -sidebar_position: 220 +sidebar_position: 183 --- # simpleLinearRegression diff --git a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md index e39af77059a..21344b58ba6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md +++ b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/singlevalueornull -sidebar_position: 220 +sidebar_position: 184 --- # singleValueOrNull diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md index 379fdcfa7c2..58ea33edb81 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/skewpop -sidebar_position: 150 +sidebar_position: 185 --- # skewPop diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md index 9e64b186db3..9c32a0183ef 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/skewsamp -sidebar_position: 151 +sidebar_position: 186 --- # skewSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 62edc221858..8791847ead0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sparkbar -sidebar_position: 311 +sidebar_position: 187 sidebar_label: sparkbar --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md index d2406197ecc..e52a442d76a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevpop -sidebar_position: 30 +sidebar_position: 188 --- # stddevPop @@ -25,7 +25,7 @@ stddevPop(x) **Returned value** -Square root of standard deviation of `x`. [Float64](../../data-types/float.md). +- Square root of standard deviation of `x`. [Float64](../../data-types/float.md). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md index a8ad5956ae8..2051ce7b125 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevpopstable -sidebar_position: 30 +sidebar_position: 189 --- # stddevPopStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md index cf8b9b20d63..e2cad40b267 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevsamp -sidebar_position: 31 +sidebar_position: 190 --- # stddevSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md index 9ae1f5f8411..205e10cced5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevsampstable -sidebar_position: 31 +sidebar_position: 191 --- # stddevSampStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md index ddac82a0977..6cc5cbd8fe1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md @@ -1,9 +1,9 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stochasticlinearregression -sidebar_position: 221 +sidebar_position: 192 --- -# stochasticLinearRegression +# stochasticLinearRegression {#agg_functions_stochasticlinearregression_parameters} This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size, and has a few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), and [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). @@ -72,5 +72,5 @@ The query will return a column of predicted values. Note that first argument of **See Also** -- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) +- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#stochasticlogisticregression) - [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md index 0a040689681..dca452a1702 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stochasticlogisticregression -sidebar_position: 222 +sidebar_position: 193 --- # stochasticLogisticRegression @@ -11,7 +11,7 @@ This function implements stochastic logistic regression. It can be used for bina Parameters are exactly the same as in stochasticLinearRegression: `learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`. -For more information see [parameters](#agg_functions-stochasticlinearregression-parameters). +For more information see [parameters](../reference/stochasticlinearregression.md/#parameters). ``` text stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md index fa320b4e336..1605e8efa13 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/studentttest -sidebar_position: 300 +sidebar_position: 194 sidebar_label: studentTTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/sum.md b/docs/en/sql-reference/aggregate-functions/reference/sum.md index a33a99f63e6..19636f003c7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sum -sidebar_position: 4 +sidebar_position: 195 --- # sum diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md index a59b87022d6..ff4ddcec142 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sumcount -sidebar_position: 144 +sidebar_position: 196 title: sumCount --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md index 1a729b18b42..ed58b3c3369 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sumkahan -sidebar_position: 145 +sidebar_position: 197 title: sumKahan --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/summap.md b/docs/en/sql-reference/aggregate-functions/reference/summap.md index fd3f095511b..4ff937f1e4f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/summap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/summap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/summap -sidebar_position: 141 +sidebar_position: 198 --- # sumMap diff --git a/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md b/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md index 7c0aa31e459..e36818e2ab8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md +++ b/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/summapwithoverflow -sidebar_position: 141 +sidebar_position: 199 --- # sumMapWithOverflow diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md index a120eafe738..5fe3cb7de8e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sumwithoverflow -sidebar_position: 140 +sidebar_position: 200 --- # sumWithOverflow diff --git a/docs/en/sql-reference/aggregate-functions/reference/theilsu.md b/docs/en/sql-reference/aggregate-functions/reference/theilsu.md index ef19438a53a..73b063cf965 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/theilsu.md +++ b/docs/en/sql-reference/aggregate-functions/reference/theilsu.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/theilsu -sidebar_position: 353 +sidebar_position: 201 --- # theilsU diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index dd4b2251a8a..695e9b1d7d8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/topk -sidebar_position: 108 +sidebar_position: 202 --- # topK diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index d2a469828fc..148a8b6ea18 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/topkweighted -sidebar_position: 109 +sidebar_position: 203 --- # topKWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index b1c8336630b..c1dc6a29e58 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniq -sidebar_position: 190 +sidebar_position: 204 --- # uniq diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index 18f44d2fcc4..70bb4463140 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqcombined -sidebar_position: 192 +sidebar_position: 205 --- # uniqCombined diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md index b6e09bcaae3..014984f6291 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqcombined64 -sidebar_position: 193 +sidebar_position: 206 --- # uniqCombined64 diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md index fd68a464881..da4d4aa9588 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqexact -sidebar_position: 191 +sidebar_position: 207 --- # uniqExact diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md index 8594ebb3782..78d84edf1be 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqhll12 -sidebar_position: 194 +sidebar_position: 208 --- # uniqHLL12 diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md index 45970f144cb..fbae42117ee 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqthetasketch -sidebar_position: 195 +sidebar_position: 209 title: uniqTheta --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index fcabeb4c6a8..182e830f19f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -1,33 +1,28 @@ --- title: "varPop" -slug: "/en/sql-reference/aggregate-functions/reference/varpop" -sidebar_position: 32 +slug: "/en/sql-reference/aggregate-functions/reference/varPop" +sidebar_position: 210 --- -This page covers the `varPop` and `varPopStable` functions available in ClickHouse. - ## varPop -Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. +Calculates the population variance. **Syntax** ```sql -covarPop(x, y) +varPop(x) ``` +Alias: `VAR_POP`. + **Parameters** -- `x`: The first data column. [Numeric](../../../native-protocol/columns.md) -- `y`: The second data column. [Numeric](../../../native-protocol/columns.md) +- `x`: Population of values to find the population variance of. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). **Returned value** -Returns an integer of type `Float64`. - -**Implementation details** - -This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable). +- Returns the population variance of `x`. [`Float64`](../../data-types/float.md). **Example** @@ -37,69 +32,21 @@ Query: DROP TABLE IF EXISTS test_data; CREATE TABLE test_data ( - x Int32, - y Int32 + x UInt8, ) ENGINE = Memory; -INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8); +INSERT INTO test_data VALUES (3), (3), (3), (4), (4), (5), (5), (7), (11), (15); SELECT - covarPop(x, y) AS covar_pop + varPop(x) AS var_pop FROM test_data; ``` Result: ```response -3 -``` - -## varPopStable - -Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations. - -**Syntax** - -```sql -covarPopStable(x, y) -``` - -**Parameters** - -- `x`: The first data column. [String literal](../../syntax#syntax-string-literal) -- `y`: The second data column. [Expression](../../syntax#syntax-expressions) - -**Returned value** - -Returns an integer of type `Float64`. - -**Implementation details** - -Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations. - -**Example** - -Query: - -```sql -DROP TABLE IF EXISTS test_data; -CREATE TABLE test_data -( - x Int32, - y Int32 -) -ENGINE = Memory; - -INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8); - -SELECT - covarPopStable(x, y) AS covar_pop_stable -FROM test_data; -``` - -Result: - -```response -0.5999999999999999 +┌─var_pop─┐ +│ 14.4 │ +└─────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md new file mode 100644 index 00000000000..68037a5a533 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md @@ -0,0 +1,52 @@ +--- +title: "varPopStable" +slug: "/en/sql-reference/aggregate-functions/reference/varpopstable" +sidebar_position: 211 +--- + +## varPopStable + +Returns the population variance. Unlike [`varPop`](../reference/varpop.md), this function uses a [numerically stable](https://en.wikipedia.org/wiki/Numerical_stability) algorithm. It works slower but provides a lower computational error. + +**Syntax** + +```sql +varPopStable(x) +``` + +Alias: `VAR_POP_STABLE`. + +**Parameters** + +- `x`: Population of values to find the population variance of. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). + +**Returned value** + +- Returns the population variance of `x`. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x UInt8, +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (3),(3),(3),(4),(4),(5),(5),(7),(11),(15); + +SELECT + varPopStable(x) AS var_pop_stable +FROM test_data; +``` + +Result: + +```response +┌─var_pop_stable─┐ +│ 14.4 │ +└────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index be669a16ae8..87a97c15dd8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -1,11 +1,9 @@ --- title: "varSamp" -slug: /en/sql-reference/aggregate-functions/reference/varsamp -sidebar_position: 33 +slug: /en/sql-reference/aggregate-functions/reference/varSamp +sidebar_position: 212 --- -This page contains information on the `varSamp` and `varSampStable` ClickHouse functions. - ## varSamp Calculate the sample variance of a data set. @@ -13,24 +11,27 @@ Calculate the sample variance of a data set. **Syntax** ```sql -varSamp(expr) +varSamp(x) ``` +Alias: `VAR_SAMP`. + **Parameters** -- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) +- `x`: The population for which you want to calculate the sample variance. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). **Returned value** -Returns a Float64 value representing the sample variance of the input data set. + +- Returns the sample variance of the input data set `x`. [Float64](../../data-types/float.md). **Implementation details** -The `varSamp()` function calculates the sample variance using the following formula: +The `varSamp` function calculates the sample variance using the following formula: -```plaintext -∑(x - mean(x))^2 / (n - 1) -``` +$$ +\sum\frac{(x - \text{mean}(x))^2}{(n - 1)} +$$ Where: @@ -38,91 +39,29 @@ Where: - `mean(x)` is the arithmetic mean of the data set. - `n` is the number of data points in the data set. -The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead. - -This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable). +The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use [`varPop`](../reference/varpop.md) instead. **Example** Query: ```sql -CREATE TABLE example_table +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data ( - id UInt64, - value Float64 + x Float64 ) -ENGINE = MergeTree -ORDER BY id; +ENGINE = Memory; -INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); +INSERT INTO test_data VALUES (10.5), (12.3), (9.8), (11.2), (10.7); -SELECT varSamp(value) FROM example_table; +SELECT round(varSamp(x),3) AS var_samp FROM test_data; ``` Response: ```response -0.8650000000000091 +┌─var_samp─┐ +│ 0.865 │ +└──────────┘ ``` - -## varSampStable - -Calculate the sample variance of a data set using a numerically stable algorithm. - -**Syntax** - -```sql -varSampStable(expr) -``` - -**Parameters** - -- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) - -**Returned value** - -The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set. - -**Implementation details** - -The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function): - -```plaintext -∑(x - mean(x))^2 / (n - 1) -``` - -Where: -- `x` is each individual data point in the data set. -- `mean(x)` is the arithmetic mean of the data set. -- `n` is the number of data points in the data set. - -The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values. - -Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead. - -**Example** - -Query: - -```sql -CREATE TABLE example_table -( - id UInt64, - value Float64 -) -ENGINE = MergeTree -ORDER BY id; - -INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); - -SELECT varSampStable(value) FROM example_table; -``` - -Response: - -```response -0.865 -``` - -This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic. diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md new file mode 100644 index 00000000000..ebe647e1951 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md @@ -0,0 +1,63 @@ +--- +title: "varSampStable" +slug: /en/sql-reference/aggregate-functions/reference/varsampstable +sidebar_position: 213 +--- + +## varSampStable + +Calculate the sample variance of a data set. Unlike [`varSamp`](../reference/varsamp.md), this function uses a numerically stable algorithm. It works slower but provides a lower computational error. + +**Syntax** + +```sql +varSampStable(x) +``` + +Alias: `VAR_SAMP_STABLE` + +**Parameters** + +- `x`: The population for which you want to calculate the sample variance. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). + +**Returned value** + +- Returns the sample variance of the input data set. [Float64](../../data-types/float.md). + +**Implementation details** + +The `varSampStable` function calculates the sample variance using the same formula as the [`varSamp`](../reference/varsamp.md): + +$$ +\sum\frac{(x - \text{mean}(x))^2}{(n - 1)} +$$ + +Where: +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Float64 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (10.5), (12.3), (9.8), (11.2), (10.7); + +SELECT round(varSampStable(x),3) AS var_samp_stable FROM test_data; +``` + +Response: + +```response +┌─var_samp_stable─┐ +│ 0.865 │ +└─────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md index 4f1085e65b4..296b70f758e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/welchttest -sidebar_position: 301 +sidebar_position: 214 sidebar_label: welchTTest --- diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index ac9a72c2641..250e766f2b7 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -137,7 +137,7 @@ If the time transition (due to daylight saving time or for other reasons) was pe Non-monotonic calendar dates. For example, in Happy Valley - Goose Bay, the time was transitioned one hour backwards at 00:01:00 7 Nov 2010 (one minute after midnight). So after 6th Nov has ended, people observed a whole one minute of 7th Nov, then time was changed back to 23:01 6th Nov and after another 59 minutes the 7th Nov started again. ClickHouse does not (yet) support this kind of fun. During these days the results of time processing functions may be slightly incorrect. -Similar issue exists for Casey Antarctic station in year 2010. They changed time three hours back at 5 Mar, 02:00. If you are working in antarctic station, please don't afraid to use ClickHouse. Just make sure you set timezone to UTC or be aware of inaccuracies. +Similar issue exists for Casey Antarctic station in year 2010. They changed time three hours back at 5 Mar, 02:00. If you are working in antarctic station, please don't be afraid to use ClickHouse. Just make sure you set timezone to UTC or be aware of inaccuracies. Time shifts for multiple days. Some pacific islands changed their timezone offset from UTC+14 to UTC-12. That's alright but some inaccuracies may present if you do calculations with their timezone for historical time points at the days of conversion. diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md index 23131d5b4fe..3c789076c1e 100644 --- a/docs/en/sql-reference/data-types/float.md +++ b/docs/en/sql-reference/data-types/float.md @@ -7,33 +7,43 @@ sidebar_label: Float32, Float64 # Float32, Float64 :::note -If you need accurate calculations, in particular if you work with financial or business data requiring a high precision you should consider using Decimal instead. Floats might lead to inaccurate results as illustrated below: +If you need accurate calculations, in particular if you work with financial or business data requiring a high precision, you should consider using [Decimal](../data-types/decimal.md) instead. -``` +[Floating Point Numbers](https://en.wikipedia.org/wiki/IEEE_754) might lead to inaccurate results as illustrated below: + +```sql CREATE TABLE IF NOT EXISTS float_vs_decimal ( my_float Float64, my_decimal Decimal64(3) -)Engine=MergeTree ORDER BY tuple() - -INSERT INTO float_vs_decimal SELECT round(randCanonical(), 3) AS res, res FROM system.numbers LIMIT 1000000; # Generate 1 000 000 random number with 2 decimal places and store them as a float and as a decimal +) +Engine=MergeTree +ORDER BY tuple(); +# Generate 1 000 000 random numbers with 2 decimal places and store them as a float and as a decimal +INSERT INTO float_vs_decimal SELECT round(randCanonical(), 3) AS res, res FROM system.numbers LIMIT 1000000; +``` +``` SELECT sum(my_float), sum(my_decimal) FROM float_vs_decimal; -> 500279.56300000014 500279.563 + +┌──────sum(my_float)─┬─sum(my_decimal)─┐ +│ 499693.60500000004 │ 499693.605 │ +└────────────────────┴─────────────────┘ SELECT sumKahan(my_float), sumKahan(my_decimal) FROM float_vs_decimal; -> 500279.563 500279.563 + +┌─sumKahan(my_float)─┬─sumKahan(my_decimal)─┐ +│ 499693.605 │ 499693.605 │ +└────────────────────┴──────────────────────┘ ``` ::: -[Floating point numbers](https://en.wikipedia.org/wiki/IEEE_754). - -Types are equivalent to types of C: +The equivalent types in ClickHouse and in C are given below: - `Float32` — `float`. - `Float64` — `double`. -Aliases: +Float types in ClickHouse have the following aliases: - `Float32` — `FLOAT`, `REAL`, `SINGLE`. - `Float64` — `DOUBLE`, `DOUBLE PRECISION`. diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index 7e3c32b3451..7ffc7447d96 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -33,7 +33,7 @@ Result: ## Ring -`Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point-data-type)). +`Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point)). **Example** @@ -54,7 +54,7 @@ Result: ## Polygon -`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring-data-type)). First element of outer array is the outer shape of polygon and all the following elements are holes. +`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring)). First element of outer array is the outer shape of polygon and all the following elements are holes. **Example** @@ -76,7 +76,7 @@ Result: ## MultiPolygon -`MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon-data-type)). +`MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon)). **Example** diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index 39e37abad82..c29be2cff58 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -1,24 +1,20 @@ --- -slug: /en/sql-reference/data-types/json +slug: /en/sql-reference/data-types/object-data-type sidebar_position: 26 -sidebar_label: JSON +sidebar_label: Object Data Type +keywords: [object, data type] --- -# JSON +# Object Data Type :::note -This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. +This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864) ::: Stores JavaScript Object Notation (JSON) documents in a single column. `JSON` is an alias for `Object('json')`. -:::note -The JSON data type is an obsolete feature. Do not use it. -If you want to use it, set `allow_experimental_object_type = 1`. -::: - ## Example **Example 1** @@ -49,7 +45,7 @@ SELECT o.a, o.b.c, o.b.d[3] FROM json **Example 2** -To be able to create an ordered `MergeTree` family table the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format: +To be able to create an ordered `MergeTree` family table, the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format: ```sql CREATE TABLE logs @@ -69,7 +65,7 @@ FROM file('access.json.gz', JSONAsString) ## Displaying JSON columns -When displaying a `JSON` column ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can display the field names as well by setting `output_format_json_named_tuples_as_objects = 1`: +When displaying a `JSON` column, ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can also display the field names by setting `output_format_json_named_tuples_as_objects = 1`: ```sql SET output_format_json_named_tuples_as_objects = 1 @@ -83,4 +79,5 @@ SELECT * FROM json FORMAT JSONEachRow ## Related Content +- [Using JSON in ClickHouse](/docs/en/integrations/data-formats/json) - [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 080de94f8b7..4c7421d57c0 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -16,7 +16,7 @@ ClickHouse supports special functions for working with dictionaries that can be ClickHouse supports: - Dictionaries with a [set of functions](../../sql-reference/functions/ext-dict-functions.md). -- [Embedded dictionaries](#embedded_dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). +- [Embedded dictionaries](#embedded-dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). :::tip Tutorial @@ -82,7 +82,7 @@ You can [configure](#configuring-a-dictionary) any number of dictionaries in the You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../sql-reference/functions/other-functions.md) function). This functionality is not related to dictionaries. ::: -## Configuring a Dictionary {#configuring-a-dictionary} +## Configuring a Dictionary @@ -123,7 +123,7 @@ LAYOUT(...) -- Memory layout configuration LIFETIME(...) -- Lifetime of dictionary in memory ``` -## Storing Dictionaries in Memory {#storing-dictionaries-in-memory} +## Storing Dictionaries in Memory There are a variety of ways to store dictionaries in memory. @@ -415,7 +415,7 @@ or LAYOUT(COMPLEX_KEY_HASHED_ARRAY([SHARDS 1])) ``` -### range_hashed {#range_hashed} +### range_hashed The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values. @@ -679,7 +679,7 @@ When searching for a dictionary, the cache is searched first. For each block of If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`. -For cache dictionaries, the expiration [lifetime](#dictionary-updates) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`. +For cache dictionaries, the expiration [lifetime](#refreshing-dictionary-data-using-lifetime) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`. This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../operations/system-tables/dictionaries.md) table. @@ -899,7 +899,7 @@ Other types are not supported yet. The function returns the attribute for the pr Data must completely fit into RAM. -## Refreshing dictionary data using LIFETIME {#lifetime} +## Refreshing dictionary data using LIFETIME ClickHouse periodically updates dictionaries based on the `LIFETIME` tag (defined in seconds). `LIFETIME` is the update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries. @@ -1031,7 +1031,7 @@ SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15)) ... ``` -## Dictionary Sources {#dictionary-sources} +## Dictionary Sources @@ -1065,7 +1065,7 @@ SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration The source is configured in the `source` section. -For source types [Local file](#local_file), [Executable file](#executable), [HTTP(s)](#https), [ClickHouse](#clickhouse) +For source types [Local file](#local-file), [Executable file](#executable-file), [HTTP(s)](#https), [ClickHouse](#clickhouse) optional settings are available: ``` xml @@ -1089,10 +1089,10 @@ SETTINGS(format_csv_allow_single_quotes = 0) Types of sources (`source_type`): -- [Local file](#local_file) -- [Executable File](#executable) -- [Executable Pool](#executable_pool) -- [HTTP(S)](#http) +- [Local file](#local-file) +- [Executable File](#executable-file) +- [Executable Pool](#executable-pool) +- [HTTP(S)](#https) - DBMS - [ODBC](#odbc) - [MySQL](#mysql) @@ -1102,7 +1102,7 @@ Types of sources (`source_type`): - [Cassandra](#cassandra) - [PostgreSQL](#postgresql) -### Local File {#local_file} +### Local File Example of settings: @@ -1132,9 +1132,9 @@ When a dictionary with source `FILE` is created via DDL command (`CREATE DICTION - [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function) -### Executable File {#executable} +### Executable File -Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data. +Working with executable files depends on [how the dictionary is stored in memory](#storing-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data. Example of settings: @@ -1161,7 +1161,7 @@ Setting fields: That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node. -### Executable Pool {#executable_pool} +### Executable Pool Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts. @@ -1196,9 +1196,9 @@ Setting fields: That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. -### HTTP(S) {#https} +### HTTP(S) -Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. +Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storing-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. Example of settings: @@ -1285,7 +1285,7 @@ Setting fields: - `db` – Name of the database. Omit it if the database name is set in the `` parameters. - `table` – Name of the table and schema if exists. - `connection_string` – Connection string. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime). - `query` – The custom query. Optional parameter. :::note @@ -1575,7 +1575,7 @@ Setting fields: - `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime). - `fail_on_connection_loss` – The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`. @@ -1672,7 +1672,7 @@ Setting fields: - `db` – Name of the database. - `table` – Name of the table. - `where` – The selection criteria. May be omitted. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime). - `secure` - Use ssl for connection. - `query` – The custom query. Optional parameter. @@ -1849,7 +1849,7 @@ Setting fields: - `db` – Name of the database. - `table` – Name of the table. - `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in PostgreSQL. For example, `id > 10 AND id < 20`. Optional parameter. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime). - `query` – The custom query. Optional parameter. :::note @@ -1873,7 +1873,7 @@ LAYOUT(FLAT()) LIFETIME(0); ``` -## Dictionary Key and Fields {#dictionary-key-and-fields} +## Dictionary Key and Fields @@ -1963,7 +1963,7 @@ PRIMARY KEY Id ### Composite Key -The key can be a `tuple` from any types of fields. The [layout](#storig-dictionaries-in-memory) in this case must be `complex_key_hashed` or `complex_key_cache`. +The key can be a `tuple` from any types of fields. The [layout](#storing-dictionaries-in-memory) in this case must be `complex_key_hashed` or `complex_key_cache`. :::tip A composite key can consist of a single element. This makes it possible to use a string as the key, for instance. @@ -2030,17 +2030,17 @@ CREATE DICTIONARY somename ( Configuration fields: -| Tag | Description | Required | -|------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| -| `name` | Column name. | Yes | -| `type` | ClickHouse data type: [UInt8](../../sql-reference/data-types/int-uint.md), [UInt16](../../sql-reference/data-types/int-uint.md), [UInt32](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md), [Int8](../../sql-reference/data-types/int-uint.md), [Int16](../../sql-reference/data-types/int-uint.md), [Int32](../../sql-reference/data-types/int-uint.md), [Int64](../../sql-reference/data-types/int-uint.md), [Float32](../../sql-reference/data-types/float.md), [Float64](../../sql-reference/data-types/float.md), [UUID](../../sql-reference/data-types/uuid.md), [Decimal32](../../sql-reference/data-types/decimal.md), [Decimal64](../../sql-reference/data-types/decimal.md), [Decimal128](../../sql-reference/data-types/decimal.md), [Decimal256](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), [String](../../sql-reference/data-types/string.md), [Array](../../sql-reference/data-types/array.md).
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../sql-reference/data-types/nullable.md) is currently supported for [Flat](#flat), [Hashed](#hashed), [ComplexKeyHashed](#complex_key_hashed), [Direct](#direct), [ComplexKeyDirect](#complex_key_direct), [RangeHashed](#range_hashed), Polygon, [Cache](#cache), [ComplexKeyCache](#complex_key_cache), [SSDCache](#ssd_cache), [SSDComplexKeyCache](#complex_key_ssd_cache) dictionaries. In [IPTrie](#ip_trie) dictionaries `Nullable` types are not supported. | Yes | -| `null_value` | Default value for a non-existing element.
In the example, it is an empty string. [NULL](../syntax.md#null) value can be used only for the `Nullable` types (see the previous line with types description). | Yes | -| `expression` | [Expression](../../sql-reference/syntax.md#expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.

Default value: no expression. | No | -| `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](#hierarchical-dictionaries).

Default value: `false`. | No | -| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).
If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.

Default value: `false`. | No | -| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.

Default value: `false`. +| Tag | Description | Required | +|------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| +| `name` | Column name. | Yes | +| `type` | ClickHouse data type: [UInt8](../../sql-reference/data-types/int-uint.md), [UInt16](../../sql-reference/data-types/int-uint.md), [UInt32](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md), [Int8](../../sql-reference/data-types/int-uint.md), [Int16](../../sql-reference/data-types/int-uint.md), [Int32](../../sql-reference/data-types/int-uint.md), [Int64](../../sql-reference/data-types/int-uint.md), [Float32](../../sql-reference/data-types/float.md), [Float64](../../sql-reference/data-types/float.md), [UUID](../../sql-reference/data-types/uuid.md), [Decimal32](../../sql-reference/data-types/decimal.md), [Decimal64](../../sql-reference/data-types/decimal.md), [Decimal128](../../sql-reference/data-types/decimal.md), [Decimal256](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), [String](../../sql-reference/data-types/string.md), [Array](../../sql-reference/data-types/array.md).
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../sql-reference/data-types/nullable.md) is currently supported for [Flat](#flat), [Hashed](#hashed), [ComplexKeyHashed](#complex_key_hashed), [Direct](#direct), [ComplexKeyDirect](#complex_key_direct), [RangeHashed](#range_hashed), Polygon, [Cache](#cache), [ComplexKeyCache](#complex_key_cache), [SSDCache](#ssd_cache), [SSDComplexKeyCache](#complex_key_ssd_cache) dictionaries. In [IPTrie](#ip_trie) dictionaries `Nullable` types are not supported. | Yes | +| `null_value` | Default value for a non-existing element.
In the example, it is an empty string. [NULL](../syntax.md#null) value can be used only for the `Nullable` types (see the previous line with types description). | Yes | +| `expression` | [Expression](../../sql-reference/syntax.md#expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.

Default value: no expression. | No | +| `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](#hierarchical-dictionaries).

Default value: `false`. | No | +| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).
If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.

Default value: `false`. | No | +| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.

Default value: `false`. -## Hierarchical Dictionaries {#hierarchical-dictionaries} +## Hierarchical Dictionaries ClickHouse supports hierarchical dictionaries with a [numeric key](#numeric-key). @@ -2165,7 +2165,7 @@ Points can be specified as an array or a tuple of their coordinates. In the curr The user can upload their own data in all formats supported by ClickHouse. -There are 3 types of [in-memory storage](#storig-dictionaries-in-memory) available: +There are 3 types of [in-memory storage](#storing-dictionaries-in-memory) available: - `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes. @@ -2435,7 +2435,7 @@ LIFETIME(0) LAYOUT(regexp_tree); ``` -## Embedded Dictionaries {#embedded-dictionaries} +## Embedded Dictionaries diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 7b52fbff714..d87ca4a0fe7 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1261,7 +1261,7 @@ SELECT arraySort((x) -> -x, [1, 2, 3]) as res; └─────────┘ ``` -For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#reverse-sort) in a sorting. +For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#arrayreversesort) in a sorting. The lambda function can accept multiple arguments. In this case, you need to pass the `arraySort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example: @@ -1307,10 +1307,15 @@ To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia. Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. -## arrayReverseSort(\[func,\] arr, ...) {#reverse-sort} +## arrayReverseSort Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description. +**Syntax** + +```sql +arrayReverseSort([func,] arr, ...) +``` Example of integer values sorting: ``` sql @@ -1907,10 +1912,16 @@ FROM numbers(1,10); - [arrayReduce](#arrayreduce) -## arrayReverse(arr) +## arrayReverse Returns an array of the same size as the original array containing the elements in reverse order. +**Syntax** + +```sql +arrayReverse(arr) +``` + Example: ``` sql diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index a48893b93bf..5ab7e07fcad 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -173,7 +173,7 @@ See function [substring](string-functions.md#substring). ## bitTest -Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left. +Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. Counting is right-to-left, starting at 0. **Syntax** @@ -226,7 +226,7 @@ Result: ## bitTestAll -Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left. +Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. Counting is right-to-left, starting at 0. The conjuction for bit-wise operations: @@ -289,7 +289,7 @@ Result: ## bitTestAny -Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left. +Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. Counting is right-to-left, starting at 0. The disjunction for bit-wise operations: diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index a5c8a663b71..d30c0f4dde4 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -74,7 +74,7 @@ bitmapSubsetInRange(bitmap, range_start, range_end) **Arguments** -- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). +- `bitmap` – [Bitmap object](#bitmapbuild). - `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md). - `range_end` – End of the range (exclusive). [UInt32](../data-types/int-uint.md). @@ -104,7 +104,7 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit) **Arguments** -- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). +- `bitmap` – [Bitmap object](#bitmapbuild). - `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md). - `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../data-types/int-uint.md). @@ -134,7 +134,7 @@ subBitmap(bitmap, offset, cardinality_limit) **Arguments** -- `bitmap` – The bitmap. [Bitmap object](#bitmap_functions-bitmapbuild). +- `bitmap` – The bitmap. [Bitmap object](#bitmapbuild). - `offset` – The position of the first element of the subset. [UInt32](../data-types/int-uint.md). - `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../data-types/int-uint.md). @@ -162,7 +162,7 @@ bitmapContains(bitmap, needle) **Arguments** -- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). +- `bitmap` – [Bitmap object](#bitmapbuild). - `needle` – Searched bit value. [UInt32](../data-types/int-uint.md). **Returned values** @@ -188,7 +188,7 @@ Result: Checks whether two bitmaps intersect. -If `bitmap2` contains exactly one element, consider using [bitmapContains](#bitmap_functions-bitmapcontains) instead as it works more efficiently. +If `bitmap2` contains exactly one element, consider using [bitmapContains](#bitmapcontains) instead as it works more efficiently. **Syntax** diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 4092c83954a..b532e0de8f0 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -83,7 +83,7 @@ Result: ``` ## makeDate32 -Like [makeDate](#makeDate) but produces a [Date32](../data-types/date32.md). +Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md). ## makeDateTime @@ -214,7 +214,7 @@ Result: **See also** -- [serverTimeZone](#serverTimeZone) +- [serverTimeZone](#servertimezone) ## serverTimeZone @@ -249,7 +249,7 @@ Result: **See also** -- [timeZone](#timeZone) +- [timeZone](#timezone) ## toTimeZone @@ -305,7 +305,7 @@ int32samoa: 1546300800 **See Also** -- [formatDateTime](#formatDateTime) - supports non-constant timezone. +- [formatDateTime](#formatdatetime) - supports non-constant timezone. - [toString](type-conversion-functions.md#tostring) - supports non-constant timezone. ## timeZoneOf @@ -1006,7 +1006,7 @@ toStartOfWeek(t[, mode[, timezone]]) **Arguments** - `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `mode` - determines the first day of the week as described in the [toWeek()](date-time-functions#toweek) function +- `mode` - determines the first day of the week as described in the [toWeek()](#toweek) function - `timezone` - Optional parameter, it behaves like any other conversion function **Returned value** @@ -1049,7 +1049,7 @@ toLastDayOfWeek(t[, mode[, timezone]]) **Arguments** - `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `mode` - determines the last day of the week as described in the [toWeek()](date-time-functions#toweek) function +- `mode` - determines the last day of the week as described in the [toWeek](#toweek) function - `timezone` - Optional parameter, it behaves like any other conversion function **Returned value** @@ -1719,7 +1719,7 @@ Result: **See Also** -- [fromDaysSinceYearZero](#fromDaysSinceYearZero) +- [fromDaysSinceYearZero](#fromdayssinceyearzero) ## fromDaysSinceYearZero @@ -1759,11 +1759,11 @@ Result: **See Also** -- [toDaysSinceYearZero](#toDaysSinceYearZero) +- [toDaysSinceYearZero](#todayssinceyearzero) ## fromDaysSinceYearZero32 -Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../data-types/date32.md). +Like [fromDaysSinceYearZero](#fromdayssinceyearzero) but returns a [Date32](../data-types/date32.md). ## age @@ -1982,7 +1982,7 @@ Result: **See Also** -- [toStartOfInterval](#tostartofintervaldate_or_date_with_time-interval-x-unit--time_zone) +- [toStartOfInterval](#tostartofinterval) ## date\_add @@ -2055,7 +2055,7 @@ Result: **See Also** -- [addDate](#addDate) +- [addDate](#adddate) ## date\_sub @@ -2129,7 +2129,7 @@ Result: **See Also** -- [subDate](#subDate) +- [subDate](#subdate) ## timestamp\_add @@ -2310,7 +2310,7 @@ Alias: `SUBDATE` - [date_sub](#date_sub) -## now {#now} +## now Returns the current date and time at the moment of query analysis. The function is a constant expression. @@ -3609,7 +3609,7 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64 └───────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -## formatDateTime {#formatDateTime} +## formatDateTime Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. @@ -3734,10 +3734,9 @@ LIMIT 10 **See Also** -- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax) +- [formatDateTimeInJodaSyntax](#formatdatetimeinjodasyntax) - -## formatDateTimeInJodaSyntax {#formatDateTimeInJodaSyntax} +## formatDateTimeInJodaSyntax Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. @@ -3902,11 +3901,11 @@ Result: **See Also** -- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax) +- [fromUnixTimestampInJodaSyntax](#fromunixtimestampinjodasyntax) ## fromUnixTimestampInJodaSyntax -Same as [fromUnixTimestamp](#fromUnixTimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style. +Same as [fromUnixTimestamp](#fromunixtimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style. **Example:** @@ -4121,7 +4120,7 @@ Result: Returns the current date and time at the moment of query analysis. The function is a constant expression. :::note -This function gives the same result that `now('UTC')` would. It was added only for MySQL support and [`now`](#now-now) is the preferred usage. +This function gives the same result that `now('UTC')` would. It was added only for MySQL support and [`now`](#now) is the preferred usage. ::: **Syntax** diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 82c21ce40c8..093ee690d47 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -12,7 +12,7 @@ For dictionaries created with [DDL queries](../../sql-reference/statements/creat For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/index.md). -## dictGet, dictGetOrDefault, dictGetOrNull {#dictGet} +## dictGet, dictGetOrDefault, dictGetOrNull Retrieves values from a dictionary. diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index 8abc8006e5d..b6ac7a74092 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -4,6 +4,8 @@ sidebar_label: Geohash title: "Functions for Working with Geohash" --- +## Geohash + [Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location. If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/). diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index bcdd457964a..5fbc2adf2fa 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -4,6 +4,8 @@ sidebar_label: H3 Indexes title: "Functions for Working with H3 Indexes" --- +## H3 Index + [H3](https://eng.uber.com/h3/) is a geographical indexing system where Earth’s surface divided into a grid of even hexagonal cells. This system is hierarchical, i. e. each hexagon on the top level ("parent") can be split into seven even but smaller ones ("children"), and so on. The level of the hierarchy is called `resolution` and can receive a value from `0` till `15`, where `0` is the `base` level with the largest and coarsest cells. @@ -16,7 +18,7 @@ The full description of the H3 system is available at [the Uber Engineering site ## h3IsValid -Verifies whether the number is a valid [H3](#h3index) index. +Verifies whether the number is a valid [H3](#h3-index) index. **Syntax** @@ -51,7 +53,7 @@ Result: ## h3GetResolution -Defines the resolution of the given [H3](#h3index) index. +Defines the resolution of the given [H3](#h3-index) index. **Syntax** @@ -86,7 +88,7 @@ Result: ## h3EdgeAngle -Calculates the average length of the [H3](#h3index) hexagon edge in grades. +Calculates the average length of the [H3](#h3-index) hexagon edge in grades. **Syntax** @@ -100,7 +102,7 @@ h3EdgeAngle(resolution) **Returned values** -- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../data-types/float.md). +- The average length of the [H3](#h3-index) hexagon edge in grades. [Float64](../../data-types/float.md). **Example** @@ -120,7 +122,7 @@ Result: ## h3EdgeLengthM -Calculates the average length of the [H3](#h3index) hexagon edge in meters. +Calculates the average length of the [H3](#h3-index) hexagon edge in meters. **Syntax** @@ -134,7 +136,7 @@ h3EdgeLengthM(resolution) **Returned values** -- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../data-types/float.md). +- The average length of the [H3](#h3-index) hexagon edge in meters. [Float64](../../data-types/float.md). **Example** @@ -154,7 +156,7 @@ Result: ## h3EdgeLengthKm -Calculates the average length of the [H3](#h3index) hexagon edge in kilometers. +Calculates the average length of the [H3](#h3-index) hexagon edge in kilometers. **Syntax** @@ -168,7 +170,7 @@ h3EdgeLengthKm(resolution) **Returned values** -- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../data-types/float.md). +- The average length of the [H3](#h3-index) hexagon edge in kilometers. [Float64](../../data-types/float.md). **Example** @@ -188,7 +190,7 @@ Result: ## geoToH3 -Returns [H3](#h3index) point index `(lon, lat)` with specified resolution. +Returns [H3](#h3-index) point index `(lon, lat)` with specified resolution. **Syntax** @@ -225,7 +227,7 @@ Result: ## h3ToGeo -Returns the centroid longitude and latitude corresponding to the provided [H3](#h3index) index. +Returns the centroid longitude and latitude corresponding to the provided [H3](#h3-index) index. **Syntax** @@ -294,7 +296,7 @@ Result: ## h3kRing - Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order. + Lists all the [H3](#h3-index) hexagons in the raduis of `k` from the given hexagon in random order. **Syntax** @@ -335,7 +337,7 @@ Result: ## h3GetBaseCell -Returns the base cell number of the [H3](#h3index) index. +Returns the base cell number of the [H3](#h3-index) index. **Syntax** @@ -437,7 +439,7 @@ Result: ## h3IndexesAreNeighbors -Returns whether or not the provided [H3](#h3index) indexes are neighbors. +Returns whether or not the provided [H3](#h3-index) indexes are neighbors. **Syntax** @@ -473,7 +475,7 @@ Result: ## h3ToChildren -Returns an array of child indexes for the given [H3](#h3index) index. +Returns an array of child indexes for the given [H3](#h3-index) index. **Syntax** @@ -508,7 +510,7 @@ Result: ## h3ToParent -Returns the parent (coarser) index containing the given [H3](#h3index) index. +Returns the parent (coarser) index containing the given [H3](#h3-index) index. **Syntax** @@ -609,7 +611,7 @@ Result: ## h3GetResolution -Returns the resolution of the [H3](#h3index) index. +Returns the resolution of the [H3](#h3-index) index. **Syntax** @@ -643,7 +645,7 @@ Result: ## h3IsResClassIII -Returns whether [H3](#h3index) index has a resolution with Class III orientation. +Returns whether [H3](#h3-index) index has a resolution with Class III orientation. **Syntax** @@ -678,7 +680,7 @@ Result: ## h3IsPentagon -Returns whether this [H3](#h3index) index represents a pentagonal cell. +Returns whether this [H3](#h3-index) index represents a pentagonal cell. **Syntax** @@ -713,7 +715,7 @@ Result: ## h3GetFaces -Returns icosahedron faces intersected by a given [H3](#h3index) index. +Returns icosahedron faces intersected by a given [H3](#h3-index) index. **Syntax** @@ -815,7 +817,7 @@ Result: ## h3ToCenterChild -Returns the center child (finer) [H3](#h3index) index contained by given [H3](#h3index) at the given resolution. +Returns the center child (finer) [H3](#h3-index) index contained by given [H3](#h3-index) at the given resolution. **Syntax** @@ -830,7 +832,7 @@ h3ToCenterChild(index, resolution) **Returned values** -- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../data-types/int-uint.md). +- [H3](#h3-index) index of the center child contained by given [H3](#h3-index) at the given resolution. [UInt64](../../data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/geo/polygon.md b/docs/en/sql-reference/functions/geo/polygon.md index c2572779ada..25a7a1fac8e 100644 --- a/docs/en/sql-reference/functions/geo/polygon.md +++ b/docs/en/sql-reference/functions/geo/polygon.md @@ -142,6 +142,34 @@ SELECT readWKTPoint('POINT (1.2 3.4)'); (1.2,3.4) ``` +## readWKTLineString + +Parses a Well-Known Text (WKT) representation of a LineString geometry and returns it in the internal ClickHouse format. + +### Syntax + +```sql +readWKTLineString(wkt_string) +``` + +### Arguments + +- `wkt_string`: The input WKT string representing a LineString geometry. + +### Returned value + +The function returns a ClickHouse internal representation of the linestring geometry. + +### Example + +```sql +SELECT readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)'); +``` + +```response +[(1,1),(2,2),(3,3),(1,1)] +``` + ## readWKTRing Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format. @@ -163,7 +191,7 @@ The function returns a ClickHouse internal representation of the ring (closed li ### Example ```sql -SELECT readWKTRing('LINESTRING (1 1, 2 2, 3 3, 1 1)'); +SELECT readWKTRing('POLYGON ((1 1, 2 2, 3 3, 1 1))'); ``` ```response diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index 3165b21318b..e022ce870b0 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -5,6 +5,8 @@ sidebar_label: S2 Geometry # Functions for Working with S2 Index +## S2Index + [S2](https://s2geometry.io/) is a geographical indexing system where all geographical data is represented on a three-dimensional sphere (similar to a globe). In the S2 library points are represented as the S2 Index - a specific number which encodes internally a point on the surface of a unit sphere, unlike traditional (latitude, longitude) pairs. To get the S2 point index for a given point specified in the format (latitude, longitude) use the [geoToS2](#geotos2) function. Also, you can use the [s2ToGeo](#s2togeo) function for getting geographical coordinates corresponding to the specified S2 point index. diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 506114038f7..e431ed75465 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -45,13 +45,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00') Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16). -## MD5 {#md5} +## MD5 Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16). If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead. If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). -## sipHash64 {#siphash64} +## sipHash64 Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 5b6a3aef2c8..11a7749b33d 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -295,7 +295,7 @@ Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null ## toIPv6 Converts a string form of IPv6 address to [IPv6](../data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. -Similar to [IPv6StringToNum](#ipv6stringtonums) function, which converts IPv6 address to binary format. +Similar to [IPv6StringToNum](#ipv6stringtonum) function, which converts IPv6 address to binary format. If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned. diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 5d73c9a83b3..7bff6a6cba5 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -5,10 +5,10 @@ sidebar_label: JSON --- There are two sets of functions to parse JSON: - - [`simpleJSON*` (`visitParam*`)](#simplejson--visitparam-functions) which is made for parsing a limited subset of JSON extremely fast. + - [`simpleJSON*` (`visitParam*`)](#simplejson-visitparam-functions) which is made for parsing a limited subset of JSON extremely fast. - [`JSONExtract*`](#jsonextract-functions) which is made for parsing ordinary JSON. -## simpleJSON / visitParam functions +## simpleJSON (visitParam) functions ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be. They try to do as little as possible to get the job done as quickly as possible. diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 12098efc635..b9b5c6d7a05 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -8,7 +8,7 @@ sidebar_label: Mathematical ## e -Returns e ([Euler's constant](https://en.wikipedia.org/wiki/Euler%27s_constant)). +Returns $e$ ([Euler's constant](https://en.wikipedia.org/wiki/Euler%27s_constant)). **Syntax** @@ -22,7 +22,7 @@ Type: [Float64](../data-types/float.md). ## pi -Returns π ([Pi](https://en.wikipedia.org/wiki/Pi)). +Returns $\pi$ ([Pi](https://en.wikipedia.org/wiki/Pi)). **Syntax** @@ -35,7 +35,7 @@ Type: [Float64](../data-types/float.md). ## exp -Returns e to the power of the given argument. +Returns $e^{x}$, where x is the given argument to the function. **Syntax** @@ -47,6 +47,22 @@ exp(x) - `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +**Example** + +Query: + +```sql +SELECT round(exp(-1), 4); +``` + +Result: + +```response +┌─round(exp(-1), 4)─┐ +│ 0.3679 │ +└───────────────────┘ +``` + **Returned value** Type: [Float*](../data-types/float.md). @@ -91,7 +107,7 @@ Type: [Float*](../data-types/float.md). ## intExp2 -Like `exp` but returns a UInt64. +Like [`exp`](#exp) but returns a UInt64. **Syntax** @@ -137,7 +153,7 @@ Type: [Float*](../data-types/float.md). ## intExp10 -Like `exp10` but returns a UInt64. +Like [`exp10`](#exp10) but returns a UInt64. **Syntax** @@ -197,7 +213,7 @@ Type: [Float*](../data-types/float.md). ## erf -If `x` is non-negative, then `erf(x / σ√2)` is the probability that a random variable having a normal distribution with standard deviation `σ` takes the value that is separated from the expected value by more than `x`. +If `x` is non-negative, then $erf(\frac{x}{\sigma\sqrt{2}})$ is the probability that a random variable having a normal distribution with standard deviation $\sigma$ takes the value that is separated from the expected value by more than `x`. **Syntax** @@ -229,7 +245,7 @@ SELECT erf(3 / sqrt(2)); ## erfc -Returns a number close to `1 - erf(x)` without loss of precision for large ‘x’ values. +Returns a number close to $1-erf(x)$ without loss of precision for large `x` values. **Syntax** @@ -403,7 +419,7 @@ Type: [Float*](../data-types/float.md). ## pow -Returns `x` to the power of `y`. +Returns $x^y$. **Syntax** @@ -434,11 +450,11 @@ cosh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `1 <= cosh(x) < +∞`. +- Values from the interval: $1 \le cosh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -468,11 +484,11 @@ acosh(x) **Arguments** -- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Hyperbolic cosine of angle. Values from the interval: $1 \le x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`. +- The angle, in radians. Values from the interval: $0 \le acosh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -502,11 +518,11 @@ sinh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `-∞ < sinh(x) < +∞`. +- Values from the interval: $-\infty \lt sinh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -536,11 +552,11 @@ asinh(x) **Arguments** -- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Hyperbolic sine of angle. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`. +- The angle, in radians. Values from the interval: $-\infty \lt asinh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -569,11 +585,11 @@ tanh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `-1 < tanh(x) < 1`. +- Values from the interval: $-1 \lt tanh(x) \lt 1$. Type: [Float*](../data-types/float.md#float32-float64). @@ -601,11 +617,11 @@ atanh(x) **Arguments** -- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Hyperbolic tangent of angle. Values from the interval: $-1 \lt x \lt 1$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`. +- The angle, in radians. Values from the interval: $-\infty \lt atanh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -640,7 +656,7 @@ atan2(y, x) **Returned value** -- The angle `θ` such that `−π < θ ≤ π`, in radians. +- The angle `θ` such that $-\pi \lt 0 \le \pi$, in radians. Type: [Float64](../data-types/float.md#float32-float64). @@ -705,11 +721,11 @@ log1p(x) **Arguments** -- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Values from the interval: $-1 \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `-∞ < log1p(x) < +∞`. +- Values from the interval: $-\infty < log1p(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -739,7 +755,7 @@ sign(x) **Arguments** -- `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse. +- `x` — Values from $-\infty$ to $+\infty$. Supports all numeric types in ClickHouse. **Returned value** @@ -804,7 +820,7 @@ sigmoid(x) **Parameters** -- `x` — input value. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — input value. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index 4bfa181a35f..7057ebebfe4 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -6,26 +6,297 @@ sidebar_label: NLP (experimental) # Natural Language Processing (NLP) Functions -:::note +:::warning This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it. ::: +## detectCharset + +The `detectCharset` function detects the character set of the non-UTF8-encoded input string. + +*Syntax* + +``` sql +detectCharset('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- A `String` containing the code of the detected character set + +*Examples* + +Query: + +```sql +SELECT detectCharset('Ich bleibe für ein paar Tage.'); +``` + +Result: + +```response +┌─detectCharset('Ich bleibe für ein paar Tage.')─┐ +│ WINDOWS-1252 │ +└────────────────────────────────────────────────┘ +``` + +## detectLanguage + +Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code. + +The `detectLanguage` function works best when providing over 200 characters in the input string. + +*Syntax* + +``` sql +detectLanguage('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- The 2-letter ISO code of the detected language + +Other possible results: + +- `un` = unknown, can not detect any language. +- `other` = the detected language does not have 2 letter code. + +*Examples* + +Query: + +```sql +SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.'); +``` + +Result: + +```response +fr +``` + +## detectLanguageMixed + +Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text. + + +*Syntax* + +``` sql +detectLanguageMixed('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language + + +*Examples* + +Query: + +```sql +SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.'); +``` + +Result: + +```response +┌─detectLanguageMixed()─┐ +│ {'ja':0.62,'fr':0.36 │ +└───────────────────────┘ +``` + +## detectProgrammingLanguage + +Determines the programming language from the source code. Calculates all the unigrams and bigrams of commands in the source code. +Then using a marked-up dictionary with weights of unigrams and bigrams of commands for various programming languages finds the biggest weight of the programming language and returns it. + +*Syntax* + +``` sql +detectProgrammingLanguage('source_code') +``` + +*Arguments* + +- `source_code` — String representation of the source code to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- Programming language. [String](../data-types/string.md). + +*Examples* + +Query: + +```sql +SELECT detectProgrammingLanguage('#include '); +``` + +Result: + +```response +┌─detectProgrammingLanguage('#include ')─┐ +│ C++ │ +└──────────────────────────────────────────────────┘ +``` + +## detectLanguageUnknown + +Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32. + + +*Syntax* + +``` sql +detectLanguageUnknown('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- The 2-letter ISO code of the detected language + +Other possible results: + +- `un` = unknown, can not detect any language. +- `other` = the detected language does not have 2 letter code. + +*Examples* + +Query: + +```sql +SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.'); +``` + +Result: + +```response +┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐ +│ de │ +└────────────────────────────────────────────────────────┘ +``` + +## detectTonality + +Determines the sentiment of text data. Uses a marked-up sentiment dictionary, in which each word has a tonality ranging from `-12` to `6`. +For each text, it calculates the average sentiment value of its words and returns it in the range `[-1,1]`. + +:::note +This function is limited in its current form. Currently it makes use of the embedded emotional dictionary at `/contrib/nlp-data/tonality_ru.zst` and only works for the Russian language. +::: + +*Syntax* + +``` sql +detectTonality(text) +``` + +*Arguments* + +- `text` — The text to be analyzed. [String](../data-types/string.md#string). + +*Returned value* + +- The average sentiment value of the words in `text`. [Float32](../data-types/float.md). + +*Examples* + +Query: + +```sql +SELECT detectTonality('Шарик - хороший пёс'), -- Sharik is a good dog + detectTonality('Шарик - пёс'), -- Sharik is a dog + detectTonality('Шарик - плохой пёс'); -- Sharkik is a bad dog +``` + +Result: + +```response +┌─detectTonality('Шарик - хороший пёс')─┬─detectTonality('Шарик - пёс')─┬─detectTonality('Шарик - плохой пёс')─┐ +│ 0.44445 │ 0 │ -0.3 │ +└───────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘ +``` +## lemmatize + +Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). + +*Syntax* + +``` sql +lemmatize('language', word) +``` + +*Arguments* + +- `language` — Language which rules will be applied. [String](../data-types/string.md#string). +- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string). + +*Examples* + +Query: + +``` sql +SELECT lemmatize('en', 'wolves'); +``` + +Result: + +``` text +┌─lemmatize("wolves")─┐ +│ "wolf" │ +└─────────────────────┘ +``` + +*Configuration* + +This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from +[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). + +``` xml + + + + en + en.bin + + + +``` + ## stem Performs stemming on a given word. -### Syntax +*Syntax* ``` sql stem('language', word) ``` -### Arguments +*Arguments* - `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). - `word` — word that needs to be stemmed. Must be in lowercase. [String](../data-types/string.md#string). -### Examples +*Examples* Query: @@ -40,7 +311,7 @@ Result: │ ['I','think','it','is','a','bless','in','disguis'] │ └────────────────────────────────────────────────────┘ ``` -### Supported languages for stem() +*Supported languages for stem()* :::note The stem() function uses the [Snowball stemming](https://snowballstem.org/) library, see the Snowball website for updated languages etc. @@ -76,53 +347,6 @@ The stem() function uses the [Snowball stemming](https://snowballstem.org/) libr - Turkish - Yiddish -## lemmatize - -Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). - -### Syntax - -``` sql -lemmatize('language', word) -``` - -### Arguments - -- `language` — Language which rules will be applied. [String](../data-types/string.md#string). -- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string). - -### Examples - -Query: - -``` sql -SELECT lemmatize('en', 'wolves'); -``` - -Result: - -``` text -┌─lemmatize("wolves")─┐ -│ "wolf" │ -└─────────────────────┘ -``` - -### Configuration - -This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from -[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). - -``` xml - - - - en - en.bin - - - -``` - ## synonyms Finds synonyms to a given word. There are two types of synonym extensions: `plain` and `wordnet`. @@ -131,18 +355,18 @@ With the `plain` extension type we need to provide a path to a simple text file, With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index. -### Syntax +*Syntax* ``` sql synonyms('extension_name', word) ``` -### Arguments +*Arguments* - `extension_name` — Name of the extension in which search will be performed. [String](../data-types/string.md#string). - `word` — Word that will be searched in extension. [String](../data-types/string.md#string). -### Examples +*Examples* Query: @@ -158,7 +382,7 @@ Result: └──────────────────────────────────────────┘ ``` -### Configuration +*Configuration* ``` xml @@ -172,154 +396,4 @@ Result: en/ -``` - -## detectLanguage - -Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code. - -The `detectLanguage` function works best when providing over 200 characters in the input string. - -### Syntax - -``` sql -detectLanguage('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- The 2-letter ISO code of the detected language - -Other possible results: - -- `un` = unknown, can not detect any language. -- `other` = the detected language does not have 2 letter code. - -### Examples - -Query: - -```sql -SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.'); -``` - -Result: - -```response -fr -``` - -## detectLanguageMixed - -Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text. - - -### Syntax - -``` sql -detectLanguageMixed('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language - - -### Examples - -Query: - -```sql -SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.'); -``` - -Result: - -```response -┌─detectLanguageMixed()─┐ -│ {'ja':0.62,'fr':0.36 │ -└───────────────────────┘ -``` - -## detectLanguageUnknown - -Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32. - - -### Syntax - -``` sql -detectLanguageUnknown('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- The 2-letter ISO code of the detected language - -Other possible results: - -- `un` = unknown, can not detect any language. -- `other` = the detected language does not have 2 letter code. - -### Examples - -Query: - -```sql -SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.'); -``` - -Result: - -```response -┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐ -│ de │ -└────────────────────────────────────────────────────────┘ -``` - -## detectCharset - -The `detectCharset` function detects the character set of the non-UTF8-encoded input string. - - -### Syntax - -``` sql -detectCharset('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- A `String` containing the code of the detected character set - -### Examples - -Query: - -```sql -SELECT detectCharset('Ich bleibe für ein paar Tage.'); -``` - -Result: - -```response -┌─detectCharset('Ich bleibe für ein paar Tage.')─┐ -│ WINDOWS-1252 │ -└────────────────────────────────────────────────┘ -``` +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 31df9e5627d..4e252785715 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -212,7 +212,7 @@ toTypeName(x) ## blockSize {#blockSize} -In ClickHouse, queries are processed in blocks (chunks). +In ClickHouse, queries are processed in [blocks](../../development/architecture.md/#block-block) (chunks). This function returns the size (row count) of the block the function is called on. **Syntax** @@ -221,6 +221,33 @@ This function returns the size (row count) of the block the function is called o blockSize() ``` +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test; +CREATE TABLE test (n UInt8) ENGINE = Memory; + +INSERT INTO test +SELECT * FROM system.numbers LIMIT 5; + +SELECT blockSize() +FROM test; +``` + +Result: + +```response + ┌─blockSize()─┐ +1. │ 5 │ +2. │ 5 │ +3. │ 5 │ +4. │ 5 │ +5. │ 5 │ + └─────────────┘ +``` + ## byteSize Returns an estimation of uncompressed byte size of its arguments in memory. @@ -735,7 +762,7 @@ LIMIT 10 Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string. -The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull). +The opposite operations of this function are [parseReadableSize](#parsereadablesize), [parseReadableSizeOrZero](#parsereadablesizeorzero), and [parseReadableSizeOrNull](#parsereadablesizeornull). **Syntax** @@ -768,7 +795,7 @@ Result: Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string. -The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull). +The opposite operations of this function are [parseReadableSize](#parsereadablesize), [parseReadableSizeOrZero](#parsereadablesizeorzero), and [parseReadableSizeOrNull](#parsereadablesizeornull). **Syntax** @@ -899,7 +926,7 @@ SELECT Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it throws an exception. -The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize). +The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize). **Syntax** @@ -937,7 +964,7 @@ SELECT Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `NULL`. -The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize). +The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize). **Syntax** @@ -975,7 +1002,7 @@ SELECT Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `0`. -The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize). +The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize). **Syntax** @@ -2684,7 +2711,7 @@ countDigits(x) - Number of digits. [UInt8](../data-types/int-uint.md#uint-ranges). :::note -For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). +For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#isdecimaloverflow). ::: **Example** @@ -2776,7 +2803,7 @@ currentProfiles() ## enabledProfiles -Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file). +Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#currentprofiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file). **Syntax** @@ -2889,11 +2916,11 @@ Result: └───────────────────────────┘ ``` -## queryID {#queryID} +## queryID Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`. -In contrast to [initialQueryID](#initial-query-id) function, `queryID` can return different results on different shards (see the example). +In contrast to [initialQueryID](#initialqueryid) function, `queryID` can return different results on different shards (see the example). **Syntax** @@ -2927,7 +2954,7 @@ Result: Returns the ID of the initial current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `initial_query_id`. -In contrast to [queryID](#query-id) function, `initialQueryID` returns the same results on different shards (see example). +In contrast to [queryID](#queryid) function, `initialQueryID` returns the same results on different shards (see example). **Syntax** @@ -3014,7 +3041,7 @@ shardCount() **See Also** -- [shardNum()](#shard-num) function example also contains `shardCount()` function call. +- [shardNum()](#shardnum) function example also contains `shardCount()` function call. ## getOSKernelVersion @@ -3688,3 +3715,283 @@ Result: ```response {'version':'1','serial_number':'2D9071D64530052D48308473922C7ADAFA85D6C5','signature_algo':'sha256WithRSAEncryption','issuer':'/CN=marsnet.local CA','not_before':'May 7 17:01:21 2024 GMT','not_after':'May 7 17:01:21 2025 GMT','subject':'/CN=chnode1','pkey_algo':'rsaEncryption'} ``` + +## lowCardinalityIndices + +Returns the position of a value in the dictionary of a [LowCardinality](../data-types/lowcardinality.md) column. Positions start at 1. Since LowCardinality have per-part dictionaries, this function may return different positions for the same value in different parts. + +**Syntax** + +```sql +lowCardinalityIndices(col) +``` + +**Arguments** + +- `col` — a low cardinality column. [LowCardinality](../data-types/lowcardinality.md). + +**Returned value** + +- The position of the value in the dictionary of the current part. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test; +CREATE TABLE test (s LowCardinality(String)) ENGINE = Memory; + +-- create two parts: + +INSERT INTO test VALUES ('ab'), ('cd'), ('ab'), ('ab'), ('df'); +INSERT INTO test VALUES ('ef'), ('cd'), ('ab'), ('cd'), ('ef'); + +SELECT s, lowCardinalityIndices(s) FROM test; +``` + +Result: + +```response + ┌─s──┬─lowCardinalityIndices(s)─┐ +1. │ ab │ 1 │ +2. │ cd │ 2 │ +3. │ ab │ 1 │ +4. │ ab │ 1 │ +5. │ df │ 3 │ + └────┴──────────────────────────┘ + ┌─s──┬─lowCardinalityIndices(s)─┐ + 6. │ ef │ 1 │ + 7. │ cd │ 2 │ + 8. │ ab │ 3 │ + 9. │ cd │ 2 │ +10. │ ef │ 1 │ + └────┴──────────────────────────┘ +``` +## lowCardinalityKeys + +Returns the dictionary values of a [LowCardinality](../data-types/lowcardinality.md) column. If the block is smaller or larger than the dictionary size, the result will be truncated or extended with default values. Since LowCardinality have per-part dictionaries, this function may return different dictionary values in different parts. + +**Syntax** + +```sql +lowCardinalityIndices(col) +``` + +**Arguments** + +- `col` — a low cardinality column. [LowCardinality](../data-types/lowcardinality.md). + +**Returned value** + +- The dictionary keys. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test; +CREATE TABLE test (s LowCardinality(String)) ENGINE = Memory; + +-- create two parts: + +INSERT INTO test VALUES ('ab'), ('cd'), ('ab'), ('ab'), ('df'); +INSERT INTO test VALUES ('ef'), ('cd'), ('ab'), ('cd'), ('ef'); + +SELECT s, lowCardinalityKeys(s) FROM test; +``` + +Result: + +```response + ┌─s──┬─lowCardinalityKeys(s)─┐ +1. │ ef │ │ +2. │ cd │ ef │ +3. │ ab │ cd │ +4. │ cd │ ab │ +5. │ ef │ │ + └────┴───────────────────────┘ + ┌─s──┬─lowCardinalityKeys(s)─┐ + 6. │ ab │ │ + 7. │ cd │ ab │ + 8. │ ab │ cd │ + 9. │ ab │ df │ +10. │ df │ │ + └────┴───────────────────────┘ +``` + +## displayName + +Returns the value of `display_name` from [config](../../operations/configuration-files.md/#configuration-files) or server Fully Qualified Domain Name (FQDN) if not set. + +**Syntax** + +```sql +displayName() +``` + +**Returned value** + +- Value of `display_name` from config or server FQDN if not set. [String](../data-types/string.md). + +**Example** + +The `display_name` can be set in `config.xml`. Taking for example a server with `display_name` configured to 'production': + +```xml + +production +``` + +Query: + +```sql +SELECT displayName(); +``` + +Result: + +```response +┌─displayName()─┐ +│ production │ +└───────────────┘ +``` + +## transactionID + +Returns the ID of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). + +:::note +This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration: + +``` + + 1 + +``` + +For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). +::: + +**Syntax** + +```sql +transactionID() +``` + +**Returned value** + +- Returns a tuple consisting of `start_csn`, `local_tid` and `host_id`. [Tuple](../data-types/tuple.md). + +- `start_csn`: Global sequential number, the newest commit timestamp that was seen when this transaction began. [UInt64](../data-types/int-uint.md). +- `local_tid`: Local sequential number that is unique for each transaction started by this host within a specific start_csn. [UInt64](../data-types/int-uint.md). +- `host_id`: UUID of the host that has started this transaction. [UUID](../data-types/uuid.md). + +**Example** + +Query: + +```sql +BEGIN TRANSACTION; +SELECT transactionID(); +ROLLBACK; +``` + +Result: + +```response +┌─transactionID()────────────────────────────────┐ +│ (32,34,'0ee8b069-f2bb-4748-9eae-069c85b5252b') │ +└────────────────────────────────────────────────┘ +``` + +## transactionLatestSnapshot + +Returns the newest snapshot (Commit Sequence Number) of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback) that is available for reading. + +:::note +This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration: + +``` + + 1 + +``` + +For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). +::: + +**Syntax** + +```sql +transactionLatestSnapshot() +``` + +**Returned value** + +- Returns the latest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md) + +**Example** + +Query: + +```sql +BEGIN TRANSACTION; +SELECT transactionLatestSnapshot(); +ROLLBACK; +``` + +Result: + +```response +┌─transactionLatestSnapshot()─┐ +│ 32 │ +└─────────────────────────────┘ +``` + +## transactionOldestSnapshot + +Returns the oldest snapshot (Commit Sequence Number) that is visible for some running [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). + +:::note +This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration: + +``` + + 1 + +``` + +For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback). +::: + +**Syntax** + +```sql +transactionOldestSnapshot() +``` + +**Returned value** + +- Returns the oldest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md) + +**Example** + +Query: + +```sql +BEGIN TRANSACTION; +SELECT transactionLatestSnapshot(); +ROLLBACK; +``` + +Result: + +```response +┌─transactionOldestSnapshot()─┐ +│ 32 │ +└─────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index d18185c5013..e2f471d47eb 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -6,49 +6,90 @@ sidebar_label: Rounding # Rounding Functions -## floor(x\[, N\]) +## floor -Returns the largest round number that is less than or equal to `x`. A round number is a multiple of 1/10N, or the nearest number of the appropriate data type if 1 / 10N isn’t exact. -‘N’ is an integer constant, optional parameter. By default it is zero, which means to round to an integer. -‘N’ may be negative. +Returns the largest rounded number less than or equal `x`. +A rounded number is a multiple of 1 / 10 * N, or the nearest number of the appropriate data type if 1 / 10 * N isn’t exact. -Examples: `floor(123.45, 1) = 123.4, floor(123.45, -1) = 120.` +Integer arguments may be rounded with negative `N` argument, with non-negative `N` the function returns `x`, i.e. does nothing. -`x` is any numeric type. The result is a number of the same type. -For integer arguments, it makes sense to round with a negative `N` value (for non-negative `N`, the function does not do anything). -If rounding causes overflow (for example, floor(-128, -1)), an implementation-specific result is returned. +If rounding causes an overflow (for example, `floor(-128, -1)`), the result is undefined. -## ceil(x\[, N\]), ceiling(x\[, N\]) +**Syntax** -Returns the smallest round number that is greater than or equal to `x`. In every other way, it is the same as the `floor` function (see above). +``` sql +floor(x[, N]) +``` -## trunc(x\[, N\]), truncate(x\[, N\]) +**Parameters** -Returns the round number with largest absolute value that has an absolute value less than or equal to `x`‘s. In every other way, it is the same as the ’floor’ function (see above). +- `x` - The value to round. [Float*](../data-types/float.md), [Decimal*](../data-types/decimal.md), or [(U)Int*](../data-types/int-uint.md). +- `N` . [(U)Int*](../data-types/int-uint.md). The default is zero, which means rounding to an integer. Can be negative. + +**Returned value** + +A rounded number of the same type as `x`. + +**Examples** + +Query: + +```sql +SELECT floor(123.45, 1) AS rounded +``` + +Result: + +``` +┌─rounded─┐ +│ 123.4 │ +└─────────┘ +``` + +Query: + +```sql +SELECT floor(123.45, -1) +``` + +Result: + +``` +┌─rounded─┐ +│ 120 │ +└─────────┘ +``` + +## ceiling + +Like `floor` but returns the smallest rounded number greater than or equal `x`. + +**Syntax** + +``` sql +ceiling(x[, N]) +``` + +Alias: `ceil` + +## truncate + +Like `floor` but returns the rounded number with largest absolute value that has an absolute value less than or equal to `x`‘s. **Syntax** ```sql -trunc(input, precision) +truncate(x[, N]) ``` -Alias: `truncate`. - -**Parameters** - -- `input`: A numeric type ([Float](../data-types/float.md), [Decimal](../data-types/decimal.md) or [Integer](../data-types/int-uint.md)). -- `precision`: An [Integer](../data-types/int-uint.md) type. - -**Returned value** - -- A data type of `input`. +Alias: `trunc`. **Example** Query: ```sql -SELECT trunc(123.499, 1) as res; +SELECT truncate(123.499, 1) as res; ``` ```response @@ -57,37 +98,40 @@ SELECT trunc(123.499, 1) as res; └───────┘ ``` -## round(x\[, N\]) +## round Rounds a value to a specified number of decimal places. -The function returns the nearest number of the specified order. In case when given number has equal distance to surrounding numbers, the function uses banker’s rounding for float number types and rounds away from zero for the other number types (Decimal). +The function returns the nearest number of the specified order. +If the input value has equal distance to two neighboring numbers, the function uses banker’s rounding for [Float*](../data-types/float.md) inputs and rounds away from zero for the other number types ([Decimal*](../data-types/decimal.md). + +**Syntax** ``` sql -round(expression [, decimal_places]) +round(x[, N]) ``` **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types). -- `decimal-places` — An integer value. - - If `decimal-places > 0` then the function rounds the value to the right of the decimal point. - - If `decimal-places < 0` then the function rounds the value to the left of the decimal point. - - If `decimal-places = 0` then the function rounds the value to integer. In this case the argument can be omitted. +- `x` — A number to round. [Float*](../data-types/float.md), [Decimal*](../data-types/decimal.md), or [(U)Int*](../data-types/int-uint.md). +- `N` — The number of decimal places to round to. Integer. Defaults to `0`. + - If `N > 0`, the function rounds to the right of the decimal point. + - If `N < 0`, the function rounds to the left of the decimal point. + - If `N = 0`, the function rounds to the next integer. **Returned value:** -The rounded number of the same type as the input number. +A rounded number of the same type as `x`. **Examples** -Example of usage with Float: +Example with `Float` inputs: -``` sql +```sql SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3; ``` -``` text +``` ┌───x─┬─round(divide(number, 2))─┐ │ 0 │ 0 │ │ 0.5 │ 0 │ @@ -95,13 +139,13 @@ SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3; └─────┴──────────────────────────┘ ``` -Example of usage with Decimal: +Example with `Decimal` inputs: -``` sql +```sql SELECT cast(number / 2 AS Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3; ``` -``` text +``` ┌───x─┬─round(CAST(divide(number, 2), 'Decimal(10, 4)'))─┐ │ 0 │ 0 │ │ 0.5 │ 1 │ @@ -109,14 +153,14 @@ SELECT cast(number / 2 AS Decimal(10,4)) AS x, round(x) FROM system.numbers LIM └─────┴──────────────────────────────────────────────────┘ ``` -If you want to keep the trailing zeros, you need to enable `output_format_decimal_trailing_zeros` +To retain trailing zeros, enable setting `output_format_decimal_trailing_zeros`: -``` sql +```sql SELECT cast(number / 2 AS Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3 settings output_format_decimal_trailing_zeros=1; ``` -``` text +``` ┌──────x─┬─round(CAST(divide(number, 2), 'Decimal(10, 4)'))─┐ │ 0.0000 │ 0.0000 │ │ 0.5000 │ 1.0000 │ @@ -151,9 +195,15 @@ round(3.65, 1) = 3.6 Rounds a number to a specified decimal position. -- If the rounding number is halfway between two numbers, the function uses banker’s rounding. Banker's rounding is a method of rounding fractional numbers. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. For example: 3.5 rounds up to 4, 2.5 rounds down to 2. It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`. +If the rounding number is halfway between two numbers, the function uses banker’s rounding. +Banker's rounding is a method of rounding fractional numbers +When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. +For example: 3.5 rounds up to 4, 2.5 rounds down to 2. +It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). +The [round](#round) function performs the same rounding for floating point numbers. +The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`. -- In other cases, the function rounds numbers to the nearest integer. +In other cases, the function rounds numbers to the nearest integer. Using banker’s rounding, you can reduce the effect that rounding numbers has on the results of summing or subtracting these numbers. @@ -166,16 +216,20 @@ For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding: **Syntax** ``` sql -roundBankers(expression [, decimal_places]) +roundBankers(x [, N]) ``` **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types). -- `decimal-places` — Decimal places. An integer number. - - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`. - - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`. - - `decimal-places = 0` — The function rounds the number to an integer. In this case the argument can be omitted. Example: `roundBankers(2.5) = 2`. + - `N > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`. + - `N < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`. + - `N = 0` — The function rounds the number to an integer. In this case the argument can be omitted. Example: `roundBankers(2.5) = 2`. + +- `x` — A number to round. [Float*](../data-types/float.md), [Decimal*](../data-types/decimal.md), or [(U)Int*](../data-types/int-uint.md). +- `N` — The number of decimal places to round to. Integer. Defaults to `0`. + - If `N > 0`, the function rounds to the right of the decimal point. + - If `N < 0`, the function rounds to the left of the decimal point. + - If `N = 0`, the function rounds to the next integer. **Returned value** @@ -185,13 +239,13 @@ A value rounded by the banker’s rounding method. Query: -``` sql +```sql SELECT number / 2 AS x, roundBankers(x, 0) AS b fROM system.numbers limit 10 ``` Result: -``` text +``` ┌───x─┬─b─┐ │ 0 │ 0 │ │ 0.5 │ 0 │ @@ -208,7 +262,7 @@ Result: Examples of Banker’s rounding: -``` text +``` roundBankers(0.4) = 0 roundBankers(-3.5) = -4 roundBankers(4.5) = 4 @@ -220,7 +274,7 @@ roundBankers(10.755, 2) = 10.76 **See Also** -- [round](#rounding_functions-round) +- [round](#round) ## roundToExp2 @@ -264,7 +318,7 @@ Result: ## roundDuration -Accepts a number. If the number is less than one, it returns `0`. Otherwise, it rounds the number down to numbers from the set of commonly used durations: `1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000`. +Accepts a number. If the number is less than one, it returns `0`. Otherwise, it rounds the number down to numbers from the set of commonly used durations: `1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000`. **Syntax** diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 342ca2b9f03..894b9026165 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -579,7 +579,6 @@ If the length of the UTF-8 byte sequence is different for upper and lower case o Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. @@ -736,7 +735,7 @@ concat(s1, s2, ...) **Arguments** -At least one value of arbitrary type. +Values of arbitrary type. Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. @@ -1136,16 +1135,136 @@ SELECT tryBase58Decode('3dc8KtHrwM') as res, tryBase58Decode('invalid') as res_i ## base64Encode -Encodes a String or FixedString as base64. +Encodes a String or FixedString as base64, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-4). Alias: `TO_BASE64`. +**Syntax** + +```sql +base64Encode(plaintext) +``` + +**Arguments** + +- `plaintext` — [String](../data-types/string.md) column or constant. + +**Returned value** + +- A string containing the encoded value of the argument. + +**Example** + +``` sql +SELECT base64Encode('clickhouse'); +``` + +Result: + +```result +┌─base64Encode('clickhouse')─┐ +│ Y2xpY2tob3VzZQ== │ +└────────────────────────────┘ +``` + +## base64URLEncode + +Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). + +**Syntax** + +```sql +base64URLEncode(url) +``` + +**Arguments** + +- `url` — [String](../data-types/string.md) column or constant. + +**Returned value** + +- A string containing the encoded value of the argument. + +**Example** + +``` sql +SELECT base64URLEncode('https://clickhouse.com'); +``` + +Result: + +```result +┌─base64URLEncode('https://clickhouse.com')─┐ +│ aHR0cDovL2NsaWNraG91c2UuY29t │ +└───────────────────────────────────────────┘ +``` + ## base64Decode -Decodes a base64-encoded String or FixedString. Throws an exception in case of error. +Accepts a String and decodes it from base64, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error. Alias: `FROM_BASE64`. +**Syntax** + +```sql +base64Decode(encoded) +``` + +**Arguments** + +- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, an exception is thrown. + +**Returned value** + +- A string containing the decoded value of the argument. + +**Example** + +``` sql +SELECT base64Decode('Y2xpY2tob3VzZQ=='); +``` + +Result: + +```result +┌─base64Decode('Y2xpY2tob3VzZQ==')─┐ +│ clickhouse │ +└──────────────────────────────────┘ +``` + +## base64URLDecode + +Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). Throws an exception in case of an error. + +**Syntax** + +```sql +base64URLDecode(encodedUrl) +``` + +**Arguments** + +- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown. + +**Returned value** + +- A string containing the decoded value of the argument. + +**Example** + +``` sql +SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t'); +``` + +Result: + +```result +┌─base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐ +│ https://clickhouse.com │ +└─────────────────────────────────────────────────┘ +``` + ## tryBase64Decode Like `base64Decode` but returns an empty string in case of error. @@ -1156,9 +1275,13 @@ Like `base64Decode` but returns an empty string in case of error. tryBase64Decode(encoded) ``` -**Parameters** +**Arguments** -- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error. +- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string. + +**Returned value** + +- A string containing the decoded value of the argument. **Examples** @@ -1169,9 +1292,41 @@ SELECT tryBase64Decode('RW5jb2RlZA==') as res, tryBase64Decode('invalid') as res ``` ```response -┌─res─────┬─res_invalid─┐ -│ Encoded │ │ -└─────────┴─────────────┘ +┌─res────────┬─res_invalid─┐ +│ clickhouse │ │ +└────────────┴─────────────┘ +``` + +## tryBase64URLDecode + +Like `base64URLDecode` but returns an empty string in case of error. + +**Syntax** + +```sql +tryBase64URLDecode(encodedUrl) +``` + +**Parameters** + +- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. + +**Returned value** + +- A string containing the decoded value of the argument. + +**Examples** + +Query: + +```sql +SELECT tryBase64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid; +``` + +```response +┌─res────────────────────┬─res_invalid─┐ +│ https://clickhouse.com │ │ +└────────────────────────┴─────────────┘ ``` ## endsWith {#endswith} @@ -1994,7 +2149,7 @@ Result: ## stringJaccardIndexUTF8 -Like [stringJaccardIndex](#stringJaccardIndex) but for UTF8-encoded strings. +Like [stringJaccardIndex](#stringjaccardindex) but for UTF8-encoded strings. ## editDistance @@ -2022,6 +2177,32 @@ Result: Alias: levenshteinDistance +## editDistanceUTF8 + +Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. + +**Syntax** + +```sql +editDistanceUTF8(string1, string2) +``` + +**Examples** + +``` sql +SELECT editDistanceUTF8('我是谁', '我是我'); +``` + +Result: + +``` text +┌─editDistanceUTF8('我是谁', '我是我')──┐ +│ 1 │ +└─────────────────────────────────────┘ +``` + +Alias: levenshteinDistanceUTF8 + ## damerauLevenshteinDistance Calculates the [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) between two byte strings. diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index d261cff3580..b7ba1d4feb7 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -262,7 +262,7 @@ Result: ## multiSearchAllPositionsUTF8 -Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings. +Like [multiSearchAllPositions](#multisearchallpositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings. **Syntax** @@ -336,7 +336,7 @@ Result: Like [`position`](#position) but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings. -Functions [`multiSearchFirstPositionCaseInsensitive`](#multiSearchFirstPositionCaseInsensitive), [`multiSearchFirstPositionUTF8`](#multiSearchFirstPositionUTF8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multiSearchFirstPositionCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. +Functions [`multiSearchFirstPositionCaseInsensitive`](#multisearchfirstpositioncaseinsensitive), [`multiSearchFirstPositionUTF8`](#multisearchfirstpositionutf8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multisearchfirstpositioncaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. **Syntax** @@ -370,7 +370,7 @@ Result: ## multiSearchFirstPositionCaseInsensitive -Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but ignores case. +Like [`multiSearchFirstPosition`](#multisearchfirstposition) but ignores case. **Syntax** @@ -404,7 +404,7 @@ Result: ## multiSearchFirstPositionUTF8 -Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings. +Like [`multiSearchFirstPosition`](#multisearchfirstposition) but assumes `haystack` and `needle` to be UTF-8 strings. **Syntax** @@ -440,7 +440,7 @@ Result: ## multiSearchFirstPositionCaseInsensitiveUTF8 -Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case. +Like [`multiSearchFirstPosition`](#multisearchfirstposition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case. **Syntax** @@ -478,7 +478,7 @@ Result: Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. -Functions [`multiSearchFirstIndexCaseInsensitive`](#multiSearchFirstIndexCaseInsensitive), [`multiSearchFirstIndexUTF8`](#multiSearchFirstIndexUTF8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multiSearchFirstIndexCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. +Functions [`multiSearchFirstIndexCaseInsensitive`](#multisearchfirstindexcaseinsensitive), [`multiSearchFirstIndexUTF8`](#multisearchfirstindexutf8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multisearchfirstindexcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. **Syntax** @@ -615,7 +615,7 @@ Result: Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. -Functions [`multiSearchAnyCaseInsensitive`](#multiSearchAnyCaseInsensitive), [`multiSearchAnyUTF8`](#multiSearchAnyUTF8) and []`multiSearchAnyCaseInsensitiveUTF8`](#multiSearchAnyCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. +Functions [`multiSearchAnyCaseInsensitive`](#multisearchanycaseinsensitive), [`multiSearchAnyUTF8`](#multisearchanyutf8) and [`multiSearchAnyCaseInsensitiveUTF8`](#multisearchanycaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. **Syntax** @@ -719,7 +719,7 @@ Result: ## multiSearchAnyCaseInsensitiveUTF8 -Like [multiSearchAnyUTF8](#multiSearchAnyUTF8) but ignores case. +Like [multiSearchAnyUTF8](#multisearchanyutf8) but ignores case. *Syntax** @@ -880,7 +880,7 @@ extractAll(haystack, pattern) Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc. -This function is slower than [extractAllGroupsVertical](#extractallgroups-vertical). +This function is slower than [extractAllGroupsVertical](#extractallgroupsvertical). **Syntax** @@ -952,7 +952,7 @@ Result: └────────────────────────────────────────────────────────────────────────────────────────┘ ``` -## like {#like} +## like Returns whether string `haystack` matches the LIKE expression `pattern`. @@ -1215,7 +1215,7 @@ Result: ## ngramSearchCaseInsensitive -Provides a case-insensitive variant of [ngramSearch](#ngramSearch). +Provides a case-insensitive variant of [ngramSearch](#ngramsearch). **Syntax** @@ -1630,7 +1630,7 @@ Result: ## hasSubsequenceCaseInsensitive -Like [hasSubsequence](#hasSubsequence) but searches case-insensitively. +Like [hasSubsequence](#hassubsequence) but searches case-insensitively. **Syntax** @@ -1665,7 +1665,7 @@ Result: ## hasSubsequenceUTF8 -Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings. +Like [hasSubsequence](#hassubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings. **Syntax** @@ -1700,7 +1700,7 @@ Result: ## hasSubsequenceCaseInsensitiveUTF8 -Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively. +Like [hasSubsequenceUTF8](#hassubsequenceutf8) but searches case-insensitively. **Syntax** diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 5dd1d5ceebe..61e84ca72d1 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -10,7 +10,7 @@ sidebar_label: Type Conversion ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion). -`to` functions and [cast](#castx-t) behave differently in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#castx-t) removes [LowCardinality](../data-types/lowcardinality.md) trait `to` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting. +`to` functions and [cast](#cast) behave differently in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#cast) removes [LowCardinality](../data-types/lowcardinality.md) trait `to` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting. :::note Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between @@ -70,7 +70,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions. **Example** @@ -169,7 +169,7 @@ Converts an input value to the [UInt](../data-types/int-uint.md) data type. This Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. -The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. +The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions. **Example** @@ -996,7 +996,7 @@ Result: ## reinterpretAsUInt8 -Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1034,7 +1034,7 @@ Result: ## reinterpretAsUInt16 -Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1072,7 +1072,7 @@ Result: ## reinterpretAsUInt32 -Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1110,7 +1110,7 @@ Result: ## reinterpretAsUInt64 -Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1148,7 +1148,7 @@ Result: ## reinterpretAsUInt128 -Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1186,7 +1186,7 @@ Result: ## reinterpretAsUInt256 -Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1224,7 +1224,7 @@ Result: ## reinterpretAsInt8 -Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1262,7 +1262,7 @@ Result: ## reinterpretAsInt16 -Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1300,7 +1300,7 @@ Result: ## reinterpretAsInt32 -Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1338,7 +1338,7 @@ Result: ## reinterpretAsInt64 -Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1376,7 +1376,7 @@ Result: ## reinterpretAsInt128 -Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1414,7 +1414,7 @@ Result: ## reinterpretAsInt256 -Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1452,7 +1452,7 @@ Result: ## reinterpretAsFloat32 -Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1486,7 +1486,7 @@ Result: ## reinterpretAsFloat64 -Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. +Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. **Syntax** @@ -1730,7 +1730,7 @@ Result: └─────────────────────┘ ``` -## reinterpret(x, T) +## reinterpret Uses the same source in-memory bytes sequence for `x` value and reinterprets it to destination type. @@ -1766,9 +1766,9 @@ Result: └─────────────┴──────────────┴───────────────┘ ``` -## CAST(x, T) +## CAST -Converts an input value to the specified data type. Unlike the [reinterpret](#type_conversion_function-reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised. +Converts an input value to the specified data type. Unlike the [reinterpret](#reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised. Several syntax variants are supported. **Syntax** @@ -1875,7 +1875,7 @@ Result: Converts `x` to the `T` data type. -The difference from [cast(x, T)](#type_conversion_function-cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception. +The difference from [cast](#cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception. **Example** @@ -2061,7 +2061,7 @@ Result: └───────────────────────────┴──────────────────────────────┘ ``` -## parseDateTime {#type_conversion_functions-parseDateTime} +## parseDateTime Converts a [String](../data-types/string.md) to [DateTime](../data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format). @@ -2102,15 +2102,15 @@ Alias: `TO_TIMESTAMP`. ## parseDateTimeOrZero -Same as for [parseDateTime](#type_conversion_functions-parseDateTime) except that it returns zero date when it encounters a date format that cannot be processed. +Same as for [parseDateTime](#parsedatetime) except that it returns zero date when it encounters a date format that cannot be processed. ## parseDateTimeOrNull -Same as for [parseDateTime](#type_conversion_functions-parseDateTime) except that it returns `NULL` when it encounters a date format that cannot be processed. +Same as for [parseDateTime](#parsedatetime) except that it returns `NULL` when it encounters a date format that cannot be processed. Alias: `str_to_date`. -## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax} +## parseDateTimeInJodaSyntax Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax. @@ -2151,11 +2151,11 @@ SELECT parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', ' ## parseDateTimeInJodaSyntaxOrZero -Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTimeInJodaSyntax) except that it returns zero date when it encounters a date format that cannot be processed. +Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that it returns zero date when it encounters a date format that cannot be processed. ## parseDateTimeInJodaSyntaxOrNull -Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTimeInJodaSyntax) except that it returns `NULL` when it encounters a date format that cannot be processed. +Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that it returns `NULL` when it encounters a date format that cannot be processed. ## parseDateTimeBestEffort ## parseDateTime32BestEffort @@ -2313,11 +2313,11 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r ## parseDateTimeBestEffortUSOrNull -Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns `NULL` when it encounters a date format that cannot be processed. +Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortus) function except that it returns `NULL` when it encounters a date format that cannot be processed. ## parseDateTimeBestEffortUSOrZero -Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed. +Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortus) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed. ## parseDateTime64BestEffort @@ -2389,7 +2389,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that Converts input parameter to the [LowCardinality](../data-types/lowcardinality.md) version of same data type. -To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`. +To convert data from the `LowCardinality` data type use the [CAST](#cast) function. For example, `CAST(x as String)`. **Syntax** @@ -2423,11 +2423,7 @@ Result: ## toUnixTimestamp64Milli -## toUnixTimestamp64Micro - -## toUnixTimestamp64Nano - -Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision. +Converts a `DateTime64` to a `Int64` value with fixed millisecond precision. The input value is scaled up or down appropriately depending on its precision. :::note The output value is a timestamp in UTC, not in the timezone of `DateTime64`. @@ -2437,24 +2433,22 @@ The output value is a timestamp in UTC, not in the timezone of `DateTime64`. ```sql toUnixTimestamp64Milli(value) -toUnixTimestamp64Micro(value) -toUnixTimestamp64Nano(value) ``` **Arguments** -- `value` — DateTime64 value with any precision. +- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md). **Returned value** -- `value` converted to the `Int64` data type. +- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md). -**Examples** +**Example** Query: ```sql -WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 +WITH toDateTime64('2009-02-13 23:31:31.011', 3, 'UTC') AS dt64 SELECT toUnixTimestamp64Milli(dt64); ``` @@ -2462,14 +2456,77 @@ Result: ```response ┌─toUnixTimestamp64Milli(dt64)─┐ -│ 1568650812345 │ +│ 1234567891011 │ └──────────────────────────────┘ ``` +## toUnixTimestamp64Micro + +Converts a `DateTime64` to a `Int64` value with fixed microsecond precision. The input value is scaled up or down appropriately depending on its precision. + +:::note +The output value is a timestamp in UTC, not in the timezone of `DateTime64`. +::: + +**Syntax** + +```sql +toUnixTimestamp64Micro(value) +``` + +**Arguments** + +- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md). + +**Returned value** + +- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md). + +**Example** + Query: -``` sql -WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 +```sql +WITH toDateTime64('1970-01-15 06:56:07.891011', 6, 'UTC') AS dt64 +SELECT toUnixTimestamp64Micro(dt64); +``` + +Result: + +```response +┌─toUnixTimestamp64Micro(dt64)─┐ +│ 1234567891011 │ +└──────────────────────────────┘ +``` + +## toUnixTimestamp64Nano + +Converts a `DateTime64` to a `Int64` value with fixed nanosecond precision. The input value is scaled up or down appropriately depending on its precision. + +:::note +The output value is a timestamp in UTC, not in the timezone of `DateTime64`. +::: + +**Syntax** + +```sql +toUnixTimestamp64Nano(value) +``` + +**Arguments** + +- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md). + +**Returned value** + +- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +WITH toDateTime64('1970-01-01 00:20:34.567891011', 9, 'UTC') AS dt64 SELECT toUnixTimestamp64Nano(dt64); ``` @@ -2477,34 +2534,32 @@ Result: ```response ┌─toUnixTimestamp64Nano(dt64)─┐ -│ 1568650812345678000 │ +│ 1234567891011 │ └─────────────────────────────┘ ``` ## fromUnixTimestamp64Milli -## fromUnixTimestamp64Micro +Converts an `Int64` to a `DateTime64` value with fixed millisecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision. -## fromUnixTimestamp64Nano - -Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and optional timezone. Input value is scaled up or down appropriately depending on it’s precision. Please note that input value is treated as UTC timestamp, not timestamp at given (or implicit) timezone. +:::note +Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone. +::: **Syntax** ``` sql fromUnixTimestamp64Milli(value[, timezone]) -fromUnixTimestamp64Micro(value[, timezone]) -fromUnixTimestamp64Nano(value[, timezone]) ``` **Arguments** -- `value` — `Int64` value with any precision. -- `timezone` — `String` (optional) timezone name of the result. +- `value` — value with any precision. [Int64](../data-types/int-uint.md). +- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md). **Returned value** -- `value` converted to the `DateTime64` data type. +- `value` converted to DateTime64 with precision `3`. [DateTime64](../data-types/datetime64.md). **Example** @@ -2512,15 +2567,101 @@ Query: ``` sql WITH CAST(1234567891011, 'Int64') AS i64 -SELECT fromUnixTimestamp64Milli(i64, 'UTC'); +SELECT + fromUnixTimestamp64Milli(i64, 'UTC') AS x, + toTypeName(x); ``` Result: ```response -┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐ -│ 2009-02-13 23:31:31.011 │ -└──────────────────────────────────────┘ +┌───────────────────────x─┬─toTypeName(x)────────┐ +│ 2009-02-13 23:31:31.011 │ DateTime64(3, 'UTC') │ +└─────────────────────────┴──────────────────────┘ +``` + +## fromUnixTimestamp64Micro + +Converts an `Int64` to a `DateTime64` value with fixed microsecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision. + +:::note +Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone. +::: + +**Syntax** + +``` sql +fromUnixTimestamp64Micro(value[, timezone]) +``` + +**Arguments** + +- `value` — value with any precision. [Int64](../data-types/int-uint.md). +- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md). + +**Returned value** + +- `value` converted to DateTime64 with precision `6`. [DateTime64](../data-types/datetime64.md). + +**Example** + +Query: + +``` sql +WITH CAST(1234567891011, 'Int64') AS i64 +SELECT + fromUnixTimestamp64Micro(i64, 'UTC') AS x, + toTypeName(x); +``` + +Result: + +```response +┌──────────────────────────x─┬─toTypeName(x)────────┐ +│ 1970-01-15 06:56:07.891011 │ DateTime64(6, 'UTC') │ +└────────────────────────────┴──────────────────────┘ +``` + +## fromUnixTimestamp64Nano + +Converts an `Int64` to a `DateTime64` value with fixed nanosecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision. + +:::note +Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone. +::: + +**Syntax** + +``` sql +fromUnixTimestamp64Nano(value[, timezone]) +``` + +**Arguments** + +- `value` — value with any precision. [Int64](../data-types/int-uint.md). +- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md). + +**Returned value** + +- `value` converted to DateTime64 with precision `9`. [DateTime64](../data-types/datetime64.md). + +**Example** + +Query: + +``` sql +WITH CAST(1234567891011, 'Int64') AS i64 +SELECT + fromUnixTimestamp64Nano(i64, 'UTC') AS x, + toTypeName(x); +``` + +Result: + +```response +┌─────────────────────────────x─┬─toTypeName(x)────────┐ +│ 1970-01-01 00:20:34.567891011 │ DateTime64(9, 'UTC') │ +└───────────────────────────────┴──────────────────────┘ ``` ## formatRow diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 8b3e4f44840..76c0141ac8b 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -818,6 +818,40 @@ The same as above, but including query string and fragment. Example: `/top/news.html?page=2#comments`. +### protocol + +Extracts the protocol from a URL. + +**Syntax** + +```sql +protocol(url) +``` + +**Arguments** + +- `url` — URL to extract protocol from. [String](../data-types/string.md). + +**Returned value** + +- Protocol, or an empty string if it cannot be determined. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT protocol('https://clickhouse.com/'); +``` + +Result: + +```response +┌─protocol('https://clickhouse.com/')─┐ +│ https │ +└─────────────────────────────────────┘ +``` + ### queryString Returns the query string without the initial question mark, `#` and everything after `#`. diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index 0323ae728a9..e990023efbc 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -150,7 +150,7 @@ The function also works for [Arrays](array-functions.md#function-empty) and [Str **Example** -To generate the UUID value, ClickHouse provides the [generateUUIDv4](#uuid-function-generate) function. +To generate the UUID value, ClickHouse provides the [generateUUIDv4](#generateuuidv4) function. Query: @@ -190,7 +190,7 @@ The function also works for [Arrays](array-functions.md#function-notempty) or [S **Example** -To generate the UUID value, ClickHouse provides the [generateUUIDv4](#uuid-function-generate) function. +To generate the UUID value, ClickHouse provides the [generateUUIDv4](#generateuuidv4) function. Query: @@ -543,12 +543,17 @@ serverUUID() Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID). -The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. +The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function `generateSnowflakeID` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries. +:::note +The generated Snowflake IDs are based on the UNIX epoch 1970-01-01. +While no standard or recommendation exists for the epoch of Snowflake IDs, implementations in other systems may use a different epoch, e.g. Twitter/X (2010-11-04) or Mastodon (2015-01-01). +::: + ``` 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 @@ -605,6 +610,11 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2); ## snowflakeToDateTime +:::warning +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. +The function will be removed at some point in future. +::: + Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format. **Syntax** @@ -641,6 +651,11 @@ Result: ## snowflakeToDateTime64 +:::warning +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. +The function will be removed at some point in future. +::: + Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format. **Syntax** @@ -677,6 +692,11 @@ Result: ## dateTimeToSnowflake +:::warning +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. +The function will be removed at some point in future. +::: + Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -711,6 +731,11 @@ Result: ## dateTime64ToSnowflake +:::warning +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. +The function will be removed at some point in future. +::: + Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -743,6 +768,148 @@ Result: └─────────────────────────────┘ ``` +## snowflakeIDToDateTime + +Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime](../data-types/datetime.md). + +**Syntax** + +``` sql +snowflakeIDToDateTime(value[, epoch[, time_zone]]) +``` + +**Arguments** + +- `value` — Snowflake ID. [UInt64](../data-types/int-uint.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value. + +**Example** + +Query: + +```sql +SELECT snowflakeIDToDateTime(7204436857747984384) AS res +``` + +Result: + +``` +┌─────────────────res─┐ +│ 2024-06-06 10:59:58 │ +└─────────────────────┘ +``` + +## snowflakeIDToDateTime64 + +Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime64](../data-types/datetime64.md). + +**Syntax** + +``` sql +snowflakeIDToDateTime64(value[, epoch[, time_zone]]) +``` + +**Arguments** + +- `value` — Snowflake ID. [UInt64](../data-types/int-uint.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision. + +**Example** + +Query: + +```sql +SELECT snowflakeIDToDateTime64(7204436857747984384) AS res +``` + +Result: + +``` +┌─────────────────res─┐ +│ 2024-06-06 10:59:58 │ +└─────────────────────┘ +``` + +## dateTimeToSnowflakeID + +Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTimeToSnowflakeID(value[, epoch]) +``` + +**Arguments** + +- `value` — Date with time. [DateTime](../data-types/datetime.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). + +**Returned value** + +- Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time. + +**Example** + +Query: + +```sql +SELECT toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt, dateTimeToSnowflakeID(dt) AS res; +``` + +Result: + +``` +┌──────────────────dt─┬─────────────────res─┐ +│ 2021-08-15 18:57:56 │ 6832626392367104000 │ +└─────────────────────┴─────────────────────┘ +``` + +## dateTime64ToSnowflakeID + +Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTime64ToSnowflakeID(value[, epoch]) +``` + +**Arguments** + +- `value` — Date with time. [DateTime64](../data-types/datetime64.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). + +**Returned value** + +- Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time. + +**Example** + +Query: + +```sql +SELECT toDateTime('2021-08-15 18:57:56.493', 3, 'Asia/Shanghai') AS dt, dateTime64ToSnowflakeID(dt) AS res; +``` + +Result: + +``` +┌──────────────────────dt─┬─────────────────res─┐ +│ 2021-08-15 18:57:56.493 │ 6832626394434895872 │ +└─────────────────────────┴─────────────────────┘ +``` + ## See also - [dictGetUUID](../functions/ext-dict-functions.md#ext_dict_functions-other) diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index 0257d21b30f..ed75b1802d8 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -235,7 +235,7 @@ If `some_predicate` is not selective enough, it will return a large amount of da ### Distributed Subqueries and max_parallel_replicas -When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed. +When [max_parallel_replicas](#distributed-subqueries-and-max_parallel_replicas) is greater than 1, distributed queries are further transformed. For example, the following: @@ -255,7 +255,7 @@ where `M` is between `1` and `3` depending on which replica the local query is e These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table. -Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if `local_table_2` does not have a sampling key, incorrect results will be produced. The same rule applies to `JOIN`. +Therefore adding the [max_parallel_replicas](#distributed-subqueries-and-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if `local_table_2` does not have a sampling key, incorrect results will be produced. The same rule applies to `JOIN`. One workaround if `local_table_2` does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`. diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index a23710b12bd..aa6f132e08e 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -108,7 +108,7 @@ ALTER TABLE visits RENAME COLUMN webBrowser TO browser CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name ``` -Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to set the partition expression](partition.md/#how-to-set-partition-expression). +Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to set the partition expression](../alter/partition.md/#how-to-set-partition-expression). If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist. @@ -173,7 +173,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) Changing the column type is the only complex action – it changes the contents of files with data. For large tables, this may take a long time. -The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#alter_add-column) description, but column type is mandatory in this case. +The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#add-column) description, but column type is mandatory in this case. Example: diff --git a/docs/en/sql-reference/statements/alter/constraint.md b/docs/en/sql-reference/statements/alter/constraint.md index 29675f704b5..54c456f9aa2 100644 --- a/docs/en/sql-reference/statements/alter/constraint.md +++ b/docs/en/sql-reference/statements/alter/constraint.md @@ -9,8 +9,8 @@ sidebar_label: CONSTRAINT Constraints could be added or deleted using following syntax: ``` sql -ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression; -ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name; +ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression; +ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name; ``` See more on [constraints](../../../sql-reference/statements/create/table.md#constraints). diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 3cfb99cff83..f81d4f02e0c 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -16,7 +16,7 @@ Most `ALTER TABLE` queries modify table settings or data: - [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md) - [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md) - [TTL](/docs/en/sql-reference/statements/alter/ttl.md) -- [STATISTIC](/docs/en/sql-reference/statements/alter/statistic.md) +- [STATISTICS](/docs/en/sql-reference/statements/alter/statistics.md) - [APPLY DELETED MASK](/docs/en/sql-reference/statements/alter/apply-deleted-mask.md) :::note diff --git a/docs/en/sql-reference/statements/alter/named-collection.md b/docs/en/sql-reference/statements/alter/named-collection.md index 71d4bfadd9c..ab772fe4dcf 100644 --- a/docs/en/sql-reference/statements/alter/named-collection.md +++ b/docs/en/sql-reference/statements/alter/named-collection.md @@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/alter/named-collection sidebar_label: NAMED COLLECTION --- +import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge'; + + + # ALTER NAMED COLLECTION This query intends to modify already existing named collections. diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index ce5cecf6fd6..778816f8934 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -31,7 +31,7 @@ The following operations with [partitions](/docs/en/engines/table-engines/merget ALTER TABLE table_name [ON CLUSTER cluster] DETACH PARTITION|PART partition_expr ``` -Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#alter_attach-partition) query. +Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#attach-partitionpart) query. Example: @@ -139,7 +139,7 @@ For the query to run successfully, the following conditions must be met: ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1 ``` -This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`. +This query copies the data partition from `table1` to `table2` and replaces the existing partition in `table2`. The operation is atomic. Note that: @@ -252,7 +252,7 @@ Downloads a partition from another server. This query only works for the replica The query does the following: 1. Downloads the partition|part from the specified shard. In ‘path-in-zookeeper’ you must specify a path to the shard in ZooKeeper. -2. Then the query puts the downloaded data to the `detached` directory of the `table_name` table. Use the [ATTACH PARTITION\|PART](#alter_attach-partition) query to add the data to the table. +2. Then the query puts the downloaded data to the `detached` directory of the `table_name` table. Use the [ATTACH PARTITION\|PART](#attach-partitionpart) query to add the data to the table. For example: @@ -353,7 +353,7 @@ You can specify the partition expression in `ALTER ... PARTITION` queries in dif - Using the keyword `ALL`. It can be used only with DROP/DETACH/ATTACH. For example, `ALTER TABLE visits ATTACH PARTITION ALL`. - As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`. - Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. -- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. +- In the [ALTER ATTACH PART](#attach-partitionpart) and [DROP DETACHED PART](#drop-detached-partitionpart) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed. diff --git a/docs/en/sql-reference/statements/alter/statistic.md b/docs/en/sql-reference/statements/alter/statistic.md deleted file mode 100644 index 1c2e45b23fd..00000000000 --- a/docs/en/sql-reference/statements/alter/statistic.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -slug: /en/sql-reference/statements/alter/statistic -sidebar_position: 45 -sidebar_label: STATISTIC ---- - -# Manipulating Column Statistics - -The following operations are available: - -- `ALTER TABLE [db].table ADD STATISTIC (columns list) TYPE type` - Adds statistic description to tables metadata. - -- `ALTER TABLE [db].table DROP STATISTIC (columns list) TYPE type` - Removes statistic description from tables metadata and deletes statistic files from disk. - -- `ALTER TABLE [db].table CLEAR STATISTIC (columns list) TYPE type` - Deletes statistic files from disk. - -- `ALTER TABLE [db.]table MATERIALIZE STATISTIC (columns list) TYPE type` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). - -The first two commands are lightweight in a sense that they only change metadata or remove files. - -Also, they are replicated, syncing statistics metadata via ZooKeeper. - -:::note -Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). -::: diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md new file mode 100644 index 00000000000..80024781f88 --- /dev/null +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -0,0 +1,33 @@ +--- +slug: /en/sql-reference/statements/alter/statistics +sidebar_position: 45 +sidebar_label: STATISTICS +--- + +# Manipulating Column Statistics + +The following operations are available: + +- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata. + +- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata. + +- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. + +- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. + +- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). + +The first two commands are lightweight in a sense that they only change metadata or remove files. + +Also, they are replicated, syncing statistics metadata via ZooKeeper. + +There is an example adding two statistics types to two columns: + +``` +ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; +``` + +:::note +Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). +::: diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index b5c156f56a9..6216b83c2ef 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'}] [[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [VALID UNTIL datetime] [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index fb7a5bd7c03..5f3dae0a9c0 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -134,8 +134,8 @@ PRIMARY KEY (event_type, ts) ORDER BY (event_type, ts, browser) SETTINGS index_granularity = 8192 --- !!! The columns' definition is unchanged but it does not matter, we are not quering --- MATERIALIZED VIEW, we are quering TO (storage) table. +-- !!! The columns' definition is unchanged but it does not matter, we are not querying +-- MATERIALIZED VIEW, we are querying TO (storage) table. -- SELECT section is updated. SHOW CREATE TABLE mv FORMAT TSVRaw; diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md index f69fa2e3678..a4e146c814c 100644 --- a/docs/en/sql-reference/statements/create/named-collection.md +++ b/docs/en/sql-reference/statements/create/named-collection.md @@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/create/named-collection sidebar_label: NAMED COLLECTION --- +import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge'; + + + # CREATE NAMED COLLECTION Creates a new named collection. diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 628fe1d2875..0253bc647e6 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -17,8 +17,8 @@ By default, tables are created only on the current server. Distributed DDL queri ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1] [COMMENT 'comment for column'], - name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2] [COMMENT 'comment for column'], + name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [COMMENT 'comment for column'] [compression_codec] [TTL expr1], + name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [COMMENT 'comment for column'] [compression_codec] [TTL expr2], ... ) ENGINE = engine COMMENT 'comment for table' diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index aee98cfcd10..8c9143ee086 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [VALID UNTIL datetime] [IN access_storage_type] diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 1bdf22b35b0..1fabb6d8cc7 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # CREATE VIEW -Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features). +Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-deprecated), and [window](#window-view-experimental) (live view and window view are experimental features). ## Normal View diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 2850ce71781..43fa344a16d 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -33,7 +33,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US - `role` — ClickHouse user role. - `user` — ClickHouse user account. -The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`. +The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option) privilege to `user` or `role`. The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles. ## Grant Current Grants Syntax @@ -201,7 +201,7 @@ Hierarchy of privileges: - `HDFS` - `S3` - [dictGet](#dictget) -- [displaySecretsInShowAndSelect](#display-secrets) +- [displaySecretsInShowAndSelect](#displaysecretsinshowandselect) - [NAMED COLLECTION ADMIN](#named-collection-admin) - `CREATE NAMED COLLECTION` - `DROP NAMED COLLECTION` @@ -498,7 +498,7 @@ Privilege level: `DICTIONARY`. - `GRANT dictGet ON mydictionary TO john` -### displaySecretsInShowAndSelect {#display-secrets} +### displaySecretsInShowAndSelect Allows a user to view secrets in `SHOW` and `SELECT` queries if both [`display_secrets_in_show_and_select` server setting](../../operations/server-configuration-parameters/settings#display_secrets_in_show_and_select) diff --git a/docs/en/sql-reference/statements/select/sample.md b/docs/en/sql-reference/statements/select/sample.md index 137f86cc8b9..78e05b19bd1 100644 --- a/docs/en/sql-reference/statements/select/sample.md +++ b/docs/en/sql-reference/statements/select/sample.md @@ -27,14 +27,14 @@ The features of data sampling are listed below: For the `SAMPLE` clause the following syntax is supported: -| SAMPLE Clause Syntax | Description | -|----------------------|------------------------------| -| `SAMPLE k` | Here `k` is the number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#select-sample-k) | -| `SAMPLE n` | Here `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#select-sample-n) | -| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) | +| SAMPLE Clause Syntax | Description | +|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `SAMPLE k` | Here `k` is the number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#sample-k) | +| `SAMPLE n` | Here `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#sample-n) | +| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#sample-k-offset-m) | -## SAMPLE K {#select-sample-k} +## SAMPLE K Here `k` is the number from 0 to 1 (both fractional and decimal notations are supported). For example, `SAMPLE 1/2` or `SAMPLE 0.5`. @@ -54,7 +54,7 @@ ORDER BY PageViews DESC LIMIT 1000 In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value `count()` is manually multiplied by 10. -## SAMPLE N {#select-sample-n} +## SAMPLE N Here `n` is a sufficiently large integer. For example, `SAMPLE 10000000`. @@ -90,7 +90,7 @@ FROM visits SAMPLE 10000000 ``` -## SAMPLE K OFFSET M {#select-sample-offset} +## SAMPLE K OFFSET M Here `k` and `m` are numbers from 0 to 1. Examples are shown below. diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 7efbff1b42b..e6d3439d2b9 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -174,7 +174,7 @@ Aborts ClickHouse process (like `kill -9 {$ pid_clickhouse-server}`) ## Managing Distributed Tables -ClickHouse can manage [distributed](../../engines/table-engines/special/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed), and [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) queries. You can also synchronously insert distributed data with the [distributed_foreground_insert](../../operations/settings/settings.md#distributed_foreground_insert) setting. +ClickHouse can manage [distributed](../../engines/table-engines/special/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#stop-distributed-sends), [FLUSH DISTRIBUTED](#flush-distributed), and [START DISTRIBUTED SENDS](#start-distributed-sends) queries. You can also synchronously insert distributed data with the [distributed_foreground_insert](../../operations/settings/settings.md#distributed_foreground_insert) setting. ### STOP DISTRIBUTED SENDS diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index fc0286e76ad..6a4afb63db8 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -54,11 +54,11 @@ Identifiers are: - Cluster, database, table, partition, and column names. - Functions. - Data types. -- [Expression aliases](#expression_aliases). +- [Expression aliases](#expression-aliases). Identifiers can be quoted or non-quoted. The latter is preferred. -Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#syntax-keywords). Examples: `x`, `_1`, `X_y__Z123_`. +Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#keywords). Examples: `x`, `_1`, `X_y__Z123_`. If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, `"id"`, `` `id` ``. diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md index 1510489ce83..f59fedeb3a2 100644 --- a/docs/en/sql-reference/table-functions/azureBlobStorage.md +++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md @@ -72,6 +72,7 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. **See Also** diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index f66178afbb2..3a3162dad9a 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -18,7 +18,7 @@ file([path_to_archive ::] path [,format] [,structure] [,compression]) **Parameters** -- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs_in_path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers). +- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs-in-path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers). - `path_to_archive` - The relative path to a zip/tar/7z archive. Supports the same globs as `path`. - `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. @@ -128,7 +128,7 @@ Reading data from `table.csv`, located in `archive1.zip` or/and `archive2.zip`: SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv'); ``` -## Globs in path {#globs_in_path} +## Globs in path Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix. @@ -196,6 +196,7 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3 - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Settings {#settings} diff --git a/docs/en/sql-reference/table-functions/fileCluster.md b/docs/en/sql-reference/table-functions/fileCluster.md index 4677d2883a7..62b00fadd62 100644 --- a/docs/en/sql-reference/table-functions/fileCluster.md +++ b/docs/en/sql-reference/table-functions/fileCluster.md @@ -22,7 +22,7 @@ fileCluster(cluster_name, path[, format, structure, compression_method]) **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs_in_path). +- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs-in-path). - `format` — [Format](../../interfaces/formats.md#formats) of the files. Type: [String](../../sql-reference/data-types/string.md). - `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md). - `compression_method` — Compression method. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`. @@ -74,7 +74,7 @@ SELECT * FROM fileCluster('my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s Str ``` -## Globs in Path {#globs_in_path} +## Globs in Path All patterns supported by [File](../../sql-reference/table-functions/file.md#globs-in-path) table function are supported by FileCluster. diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index d65615e7588..28cba5ccc6a 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -97,6 +97,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index cbef80371a3..35e5d86034c 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -269,9 +269,10 @@ FROM s3( ## Virtual Columns {#virtual-columns} -- `_path` — Path to the file. Type: `LowCardinalty(String)`. -- `_file` — Name of the file. Type: `LowCardinalty(String)`. -- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_path` — Path to the file. Type: `LowCardinalty(String)`. In case of archive, shows path in a format: "{path_to_archive}::{path_to_file_inside_archive}" +- `_file` — Name of the file. Type: `LowCardinalty(String)`. In case of archive shows name of the file inside the archive. +- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 4dc6e435b50..3bb7aff53a7 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -53,6 +53,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it - `_path` — Path to the `URL`. Type: `LowCardinalty(String)`. - `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`. - `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 32ebc6d028f..49076f3cbe1 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -36,7 +36,7 @@ Finds non-negative derivative for given `metric_column` by `timestamp_column`. `INTERVAL` can be omitted, default is `INTERVAL 1 SECOND`. The computed value is the following for each row: - `0` for 1st row, -- ${metric_i - metric_{i-1} \over timestamp_i - timestamp_{i-1}} * interval$ for $i_th$ row. +- ${\text{metric}_i - \text{metric}_{i-1} \over \text{timestamp}_i - \text{timestamp}_{i-1}} * \text{interval}$ for $i_{th}$ row. ## Syntax @@ -80,8 +80,8 @@ These functions can be used only as a window function. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - `rank()` - Rank the current row within its partition with gaps. - `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned. +- `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used. ## Examples diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 01ff4dd5f28..bf42edf89ff 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -283,7 +283,7 @@ Pull request можно создать, даже если работа над з Тесты будут запущены, как только сотрудники ClickHouse поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа. -Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Clickhouse build check». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно). +Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Builds». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно). Вероятнее всего, часть сборок не будет успешной с первого раза. Ведь мы проверяем сборку кода и gcc и clang, а при сборке с помощью clang включаются почти все существующие в природе warnings (всегда с флагом `-Werror`). На той же странице, вы сможете найти логи сборки - вам не обязательно самому собирать ClickHouse всеми возможными способами. diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index 4d19cf50ae1..86eeaac2da7 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -141,6 +141,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe - `--secure` — если указано, будет использован безопасный канал. - `--history_file` - путь к файлу с историей команд. - `--param_` — значение параметра для [запроса с параметрами](#cli-queries-with-parameters). +- `--jwt` – авторизация с использованием JSON Web Token. Доступно только в ClickHouse Cloud. Вместо параметров `--host`, `--port`, `--user` и `--password` клиент ClickHouse также поддерживает строки подключения (смотри следующий раздел). diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index 5f11f1b430b..d9da51892f9 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -414,6 +414,8 @@ $ curl -v 'http://localhost:8123/predefined_query' - `content_type` — используется со всеми типами, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). + - `http_response_headers` — используется со всеми типами чтобы добавить кастомные хедеры в ответ. Может использоваться в том числе для задания хедера `Content-Type` вместо `content_type`. + - `response_content` — используется с типом`static`, содержимое ответа, отправленное клиенту, при использовании префикса ‘file://’ or ‘config://’, находит содержимое из файла или конфигурации, отправленного клиенту. Далее приведены методы настройки для различных типов. @@ -509,6 +511,33 @@ max_final_threads 2 static 402 text/html; charset=UTF-8 + + en + 43 + + Say Hi! + + + +
+``` + +`http_response_headers` так же может использоваться для определения `Content-Type` вместо `content_type`. + +``` xml + + + GET + xxx + /hi + + static + 402 + + text/html; charset=UTF-8 + en + 43 + Say Hi! @@ -589,6 +618,9 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' static text/html; charset=UTF-8 + + 737060cd8c284d8af7ad3082f209582d + file:///absolute_path_file.html @@ -599,6 +631,9 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' static text/html; charset=UTF-8 + + 737060cd8c284d8af7ad3082f209582d + file://./relative_path_file.html diff --git a/docs/ru/operations/external-authenticators/ssl-x509.md b/docs/ru/operations/external-authenticators/ssl-x509.md index affdf87b199..7f1fb03962c 100644 --- a/docs/ru/operations/external-authenticators/ssl-x509.md +++ b/docs/ru/operations/external-authenticators/ssl-x509.md @@ -3,23 +3,30 @@ slug: /ru/operations/external-authenticators/ssl-x509 --- # Аутентификация по сертификату SSL X.509 {#ssl-external-authentication} -[Опция 'strict'](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) включает обязательную проверку сертификатов входящих соединений в библиотеке `SSL`. В этом случае могут быть установлены только соединения, представившие действительный сертификат. Соединения с недоверенными сертификатами будут отвергнуты. Таким образом, проверка сертификата позволяет однозначно аутентифицировать входящее соединение. Идентификация пользователя осуществляется по полю `Common Name` сертификата. Это позволяет ассоциировать несколько сертификатов с одним и тем же пользователем. Дополнительно, перевыпуск и отзыв сертификата не требуют изменения конфигурации ClickHouse. +[Опция 'strict'](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) включает обязательную проверку сертификатов входящих соединений в библиотеке `SSL`. В этом случае могут быть установлены только соединения, представившие действительный сертификат. Соединения с недоверенными сертификатами будут отвергнуты. Таким образом, проверка сертификата позволяет однозначно аутентифицировать входящее соединение. Идентификация пользователя осуществляется по полю `Common Name` или `subjectAltName` сертификата. Это позволяет ассоциировать несколько сертификатов с одним и тем же пользователем. Дополнительно, перевыпуск и отзыв сертификата не требуют изменения конфигурации ClickHouse. -Для включения аутентификации по SSL сертификату, необходимо указать список `Common Name` для каждого пользователя ClickHouse в файле настройки `config.xml`: +Для включения аутентификации по SSL сертификату, необходимо указать список `Common Name` или `subjectAltName` для каждого пользователя ClickHouse в файле настройки `config.xml`: **Example** ```xml - - + + host.domain.com:example_user host.domain.com:example_user_dev - + - + + + + DNS:host.domain.com + + + + ``` diff --git a/docs/ru/sql-reference/functions/rounding-functions.md b/docs/ru/sql-reference/functions/rounding-functions.md index cc939f69afc..f1dd57505ea 100644 --- a/docs/ru/sql-reference/functions/rounding-functions.md +++ b/docs/ru/sql-reference/functions/rounding-functions.md @@ -10,7 +10,7 @@ sidebar_label: "Функции округления" Возвращает наибольшее круглое число, которое меньше или равно, чем x. Круглым называется число, кратное 1 / 10N или ближайшее к нему число соответствующего типа данных, если 1 / 10N не представимо точно. -N - целочисленная константа, не обязательный параметр. По умолчанию - ноль, что означает - округлять до целого числа. +N - целочисленный аргумент, не обязательный параметр. По умолчанию - ноль, что означает - округлять до целого числа. N может быть отрицательным. Примеры: `floor(123.45, 1) = 123.4, floor(123.45, -1) = 120.` diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index fc258f7b4cf..2436581fc7f 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -538,16 +538,28 @@ SELECT base58Decode('3dc8KtHrwM'); Синоним: `TO_BASE64`. +## base64URLEncode(s) + +Производит кодирование URL (String или FixedString) в base64-представление в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). + ## base64Decode(s) {#base64decode} Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение Синоним: `FROM_BASE64`. +## base64URLDecode(s) + +Декодирует base64-представление URL в исходную строку в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). При невозможности декодирования выбрасывает исключение + ## tryBase64Decode(s) {#trybase64decode} Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку. +## tryBase64URLDecode(s) + +Функционал аналогичен base64URLDecode, но при невозможности декодирования возвращает пустую строку. + ## endsWith(s, suffix) {#endswith} Возвращает 1, если строка завершается указанным суффиксом, и 0 в противном случае. diff --git a/docs/ru/sql-reference/statements/alter/constraint.md b/docs/ru/sql-reference/statements/alter/constraint.md index ad5f23e5fdc..45b0f5f6350 100644 --- a/docs/ru/sql-reference/statements/alter/constraint.md +++ b/docs/ru/sql-reference/statements/alter/constraint.md @@ -11,8 +11,8 @@ sidebar_label: "Манипуляции с ограничениями" Добавить или удалить ограничение можно с помощью запросов ``` sql -ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression; -ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name; +ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression; +ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name; ``` Запросы выполняют добавление или удаление метаданных об ограничениях таблицы `[db].name`, поэтому выполняются мгновенно. diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index 76cfdb251dc..fac2cacf8cc 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -13,7 +13,7 @@ sidebar_label: "Пользователь" ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index 67bd681269b..d5ece5b23a9 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -201,18 +201,18 @@ ClickHouse 不要求主键唯一,所以您可以插入多条具有相同主键 主键中列的数量并没有明确的限制。依据数据结构,您可以在主键包含多些或少些列。这样可以: - - 改善索引的性能。 +- 改善索引的性能。 - - 如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能: + 如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能: - - 查询会使用 `c` 列作为条件 - - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。 + - 查询会使用 `c` 列作为条件 + - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。 - - 改善数据压缩。 +- 改善数据压缩。 - ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。 + ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。 - - 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 +- 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 在这种情况下,指定与主键不同的 *排序键* 也是有意义的。 diff --git a/docs/zh/guides/improving-query-performance/skipping-indexes.md b/docs/zh/guides/improving-query-performance/skipping-indexes.md index f9f43e46927..8eb88d859f2 100644 --- a/docs/zh/guides/improving-query-performance/skipping-indexes.md +++ b/docs/zh/guides/improving-query-performance/skipping-indexes.md @@ -123,7 +123,7 @@ Bloom filter是一种数据结构,它允许对集合成员进行高效的是 有三种基于Bloom过滤器的数据跳数索引类型: -* 基本的**bloom_filter**接受一个可选参数,该参数表示在0到1之间允许的“假阳性”率(如果未指定,则使用.025)。 +* 基本的**bloom_filter**接受一个可选参数,该参数表示在0到1之间允许的“假阳性”率(如果未指定,则使用0.025)。 * 更专业的**tokenbf_v1**。需要三个参数,用来优化布隆过滤器:(1)过滤器的大小字节(大过滤器有更少的假阳性,有更高的存储成本),(2)哈希函数的个数(更多的散列函数可以减少假阳性)。(3)布隆过滤器哈希函数的种子。有关这些参数如何影响布隆过滤器功能的更多细节,请参阅 [这里](https://hur.st/bloomfilter/) 。此索引仅适用于String、FixedString和Map类型的数据。输入表达式被分割为由非字母数字字符分隔的字符序列。例如,列值`This is a candidate for a "full text" search`将被分割为`This` `is` `a` `candidate` `for` `full` `text` `search`。它用于LIKE、EQUALS、in、hasToken()和类似的长字符串中单词和其他值的搜索。例如,一种可能的用途是在非结构的应用程序日志行列中搜索少量的类名或行号。 diff --git a/docs/zh/sql-reference/statements/alter/constraint.md b/docs/zh/sql-reference/statements/alter/constraint.md index 86ffcf09d65..59edcf10645 100644 --- a/docs/zh/sql-reference/statements/alter/constraint.md +++ b/docs/zh/sql-reference/statements/alter/constraint.md @@ -9,8 +9,8 @@ sidebar_label: 约束 约束可以使用以下语法添加或删除: ``` sql -ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression; -ALTER TABLE [db].name DROP CONSTRAINT constraint_name; +ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression; +ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name; ``` 查看[constraints](../../../sql-reference/statements/create/table.mdx#constraints)。 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index efe23d57478..6343dc85d00 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -64,6 +64,7 @@ namespace ErrorCodes extern const int NETWORK_ERROR; extern const int AUTHENTICATION_FAILED; extern const int NO_ELEMENTS_IN_CONFIG; + extern const int USER_EXPIRED; } @@ -74,6 +75,12 @@ void Client::processError(const String & query) const fmt::print(stderr, "Received exception from server (version {}):\n{}\n", server_version, getExceptionMessage(*server_exception, print_stack_trace, true)); + + if (server_exception->code() == ErrorCodes::USER_EXPIRED) + { + server_exception->rethrow(); + } + if (is_interactive) { fmt::print(stderr, "\n"); @@ -241,6 +248,10 @@ std::vector Client::loadWarningMessages() } } +Poco::Util::LayeredConfiguration & Client::getClientConfiguration() +{ + return config(); +} void Client::initialize(Poco::Util::Application & self) { @@ -690,9 +701,7 @@ bool Client::processWithFuzzing(const String & full_query) const char * begin = full_query.data(); orig_ast = parseQuery(begin, begin + full_query.size(), global_context->getSettingsRef(), - /*allow_multi_statements=*/ true, - /*is_interactive=*/ is_interactive, - /*ignore_error=*/ ignore_error); + /*allow_multi_statements=*/ true); } catch (const Exception & e) { @@ -944,6 +953,7 @@ void Client::addOptions(OptionsDescription & options_description) ("ssh-key-file", po::value(), "File containing the SSH private key for authenticate with the server.") ("ssh-key-passphrase", po::value(), "Passphrase for the SSH private key specified by --ssh-key-file.") ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") + ("jwt", po::value(), "Use JWT for authentication") ("max_client_network_bandwidth", po::value(), "the maximum speed of data exchange over the network for the client in bytes per second.") ("compression", po::value(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).") @@ -1102,6 +1112,12 @@ void Client::processOptions(const OptionsDescription & options_description, config().setBool("no-warnings", true); if (options.count("fake-drop")) config().setString("ignore_drop_queries_probability", "1"); + if (options.count("jwt")) + { + if (!options["user"].defaulted()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "User and JWT flags can't be specified together"); + config().setString("jwt", options["jwt"].as()); + } if (options.count("accept-invalid-certificate")) { config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler"); diff --git a/programs/client/Client.h b/programs/client/Client.h index bef948b3c1e..229608f787d 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -16,6 +16,9 @@ public: int main(const std::vector & /*args*/) override; protected: + + Poco::Util::LayeredConfiguration & getClientConfiguration() override; + bool processWithFuzzing(const String & full_query) override; std::optional processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query); diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index ebec337060c..a20c1f686f3 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -368,7 +368,7 @@ int KeeperClient::main(const std::vector & /* args */) return 0; } - DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml")); + ConfigProcessor config_processor(config().getString("config-file", "config.xml")); /// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present. ConfigProcessor::registerEmbeddedConfig("config.xml", ""); @@ -383,6 +383,9 @@ int KeeperClient::main(const std::vector & /* args */) for (const auto & key : keys) { + if (key != "node") + continue; + String prefix = "zookeeper." + key; String host = clickhouse_config.configuration->getString(prefix + ".host"); String port = clickhouse_config.configuration->getString(prefix + ".port"); @@ -401,6 +404,7 @@ int KeeperClient::main(const std::vector & /* args */) zk_args.hosts.push_back(host + ":" + port); } + zk_args.availability_zones.resize(zk_args.hosts.size()); zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000; zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000; zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000; diff --git a/programs/keeper-client/Parser.cpp b/programs/keeper-client/Parser.cpp index 5b16e6d2c23..51f85cf4a69 100644 --- a/programs/keeper-client/Parser.cpp +++ b/programs/keeper-client/Parser.cpp @@ -12,8 +12,7 @@ bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result) if (!parseIdentifierOrStringLiteral(pos, expected, result)) return false; } - - while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream && pos->type != TokenType::Semicolon) + else if (pos->type == TokenType::Number) { result.append(pos->begin, pos->end); ++pos; @@ -40,8 +39,8 @@ bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) for (const auto & pair : KeeperClient::commands) expected.add(pos, pair.first.data()); - for (const auto & flwc : four_letter_word_commands) - expected.add(pos, flwc.data()); + for (const auto & four_letter_word_command : four_letter_word_commands) + expected.add(pos, four_letter_word_command.data()); if (pos->type != TokenType::BareWord) return false; diff --git a/programs/keeper-client/Parser.h b/programs/keeper-client/Parser.h index 57ee6ce4a18..503edfa4f73 100644 --- a/programs/keeper-client/Parser.h +++ b/programs/keeper-client/Parser.h @@ -11,7 +11,6 @@ namespace DB { bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result); - bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path); diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 22874e199be..91e162e40af 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -154,8 +154,6 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Storages/StorageS3Settings.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index dba5c2b7d2a..f14ef2e5552 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -355,15 +355,13 @@ try std::string include_from_path = config().getString("include_from", "/etc/metrika.xml"); - if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX)) - { - PlacementInfo::PlacementInfo::instance().initialize(config()); - } + PlacementInfo::PlacementInfo::instance().initialize(config()); GlobalThreadPool::initialize( - config().getUInt("max_thread_pool_size", 100), - config().getUInt("max_thread_pool_free_size", 1000), - config().getUInt("thread_pool_queue_size", 10000) + /// We need to have sufficient amount of threads for connections + nuraft workers + keeper workers, 1000 is an estimation + std::min(1000U, config().getUInt("max_thread_pool_size", 1000)), + config().getUInt("max_thread_pool_free_size", 100), + config().getUInt("thread_pool_queue_size", 1000) ); /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed). SCOPE_EXIT({ @@ -576,8 +574,7 @@ try #if USE_SSL CertificateReloader::instance().tryLoad(*config); #endif - }, - /* already_loaded = */ false); /// Reload it right now (initial loading) + }); SCOPE_EXIT({ LOG_INFO(log, "Shutting down."); diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 2fca10ce4d7..86410d712ec 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -11,7 +11,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES LibraryBridgeHandlers.cpp SharedLibrary.cpp library-bridge.cpp - createFunctionBaseCast.cpp ) clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) @@ -20,6 +19,7 @@ target_link_libraries(clickhouse-library-bridge PRIVATE daemon dbms bridge + clickhouse_functions ) set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 4d5cfb09e6a..b33e1595056 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -59,8 +60,13 @@ # include #endif + namespace fs = std::filesystem; +namespace CurrentMetrics +{ + extern const Metric MemoryTracking; +} namespace DB { @@ -82,6 +88,11 @@ void applySettingsOverridesForLocal(ContextMutablePtr context) context->setSettings(settings); } +Poco::Util::LayeredConfiguration & LocalServer::getClientConfiguration() +{ + return config(); +} + void LocalServer::processError(const String &) const { if (ignore_error) @@ -117,20 +128,21 @@ void LocalServer::initialize(Poco::Util::Application & self) Poco::Util::Application::initialize(self); /// Load config files if exists - if (config().has("config-file") || fs::exists("config.xml")) + if (getClientConfiguration().has("config-file") || fs::exists("config.xml")) { - const auto config_path = config().getString("config-file", "config.xml"); + const auto config_path = getClientConfiguration().getString("config-file", "config.xml"); ConfigProcessor config_processor(config_path, false, true); ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); auto loaded_config = config_processor.loadConfig(); - config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); + getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); } + server_settings.loadSettingsFromConfig(config()); + GlobalThreadPool::initialize( - config().getUInt("max_thread_pool_size", 10000), - config().getUInt("max_thread_pool_free_size", 1000), - config().getUInt("thread_pool_queue_size", 10000) - ); + server_settings.max_thread_pool_size, + server_settings.max_thread_pool_free_size, + server_settings.thread_pool_queue_size); #if USE_AZURE_BLOB_STORAGE /// See the explanation near the same line in Server.cpp @@ -141,18 +153,17 @@ void LocalServer::initialize(Poco::Util::Application & self) #endif getIOThreadPool().initialize( - config().getUInt("max_io_thread_pool_size", 100), - config().getUInt("max_io_thread_pool_free_size", 0), - config().getUInt("io_thread_pool_queue_size", 10000)); + server_settings.max_io_thread_pool_size, + server_settings.max_io_thread_pool_free_size, + server_settings.io_thread_pool_queue_size); - - const size_t active_parts_loading_threads = config().getUInt("max_active_parts_loading_thread_pool_size", 64); + const size_t active_parts_loading_threads = server_settings.max_active_parts_loading_thread_pool_size; getActivePartsLoadingThreadPool().initialize( active_parts_loading_threads, 0, // We don't need any threads one all the parts will be loaded active_parts_loading_threads); - const size_t outdated_parts_loading_threads = config().getUInt("max_outdated_parts_loading_thread_pool_size", 32); + const size_t outdated_parts_loading_threads = server_settings.max_outdated_parts_loading_thread_pool_size; getOutdatedPartsLoadingThreadPool().initialize( outdated_parts_loading_threads, 0, // We don't need any threads one all the parts will be loaded @@ -160,7 +171,7 @@ void LocalServer::initialize(Poco::Util::Application & self) getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads); - const size_t unexpected_parts_loading_threads = config().getUInt("max_unexpected_parts_loading_thread_pool_size", 32); + const size_t unexpected_parts_loading_threads = server_settings.max_unexpected_parts_loading_thread_pool_size; getUnexpectedPartsLoadingThreadPool().initialize( unexpected_parts_loading_threads, 0, // We don't need any threads one all the parts will be loaded @@ -168,7 +179,7 @@ void LocalServer::initialize(Poco::Util::Application & self) getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads); - const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128); + const size_t cleanup_threads = server_settings.max_parts_cleaning_thread_pool_size; getPartsCleaningThreadPool().initialize( cleanup_threads, 0, // We don't need any threads one all the parts will be deleted @@ -201,10 +212,10 @@ void LocalServer::tryInitPath() { std::string path; - if (config().has("path")) + if (getClientConfiguration().has("path")) { // User-supplied path. - path = config().getString("path"); + path = getClientConfiguration().getString("path"); Poco::trimInPlace(path); if (path.empty()) @@ -263,13 +274,13 @@ void LocalServer::tryInitPath() global_context->setUserFilesPath(""); /// user's files are everywhere - std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/"); + std::string user_scripts_path = getClientConfiguration().getString("user_scripts_path", fs::path(path) / "user_scripts/"); global_context->setUserScriptsPath(user_scripts_path); /// top_level_domains_lists - const std::string & top_level_domains_path = config().getString("top_level_domains_path", fs::path(path) / "top_level_domains/"); + const std::string & top_level_domains_path = getClientConfiguration().getString("top_level_domains_path", fs::path(path) / "top_level_domains/"); if (!top_level_domains_path.empty()) - TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config()); + TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", getClientConfiguration()); } @@ -311,14 +322,14 @@ void LocalServer::cleanup() std::string LocalServer::getInitialCreateTableQuery() { - if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty())) + if (!getClientConfiguration().has("table-structure") && !getClientConfiguration().has("table-file") && !getClientConfiguration().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty())) return {}; - auto table_name = backQuoteIfNeed(config().getString("table-name", "table")); - auto table_structure = config().getString("table-structure", "auto"); + auto table_name = backQuoteIfNeed(getClientConfiguration().getString("table-name", "table")); + auto table_structure = getClientConfiguration().getString("table-structure", "auto"); String table_file; - if (!config().has("table-file") || config().getString("table-file") == "-") + if (!getClientConfiguration().has("table-file") || getClientConfiguration().getString("table-file") == "-") { /// Use Unix tools stdin naming convention table_file = "stdin"; @@ -326,7 +337,7 @@ std::string LocalServer::getInitialCreateTableQuery() else { /// Use regular file - auto file_name = config().getString("table-file"); + auto file_name = getClientConfiguration().getString("table-file"); table_file = quoteString(file_name); } @@ -374,18 +385,18 @@ void LocalServer::setupUsers() ConfigurationPtr users_config; auto & access_control = global_context->getAccessControl(); - access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true)); - access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true)); - if (config().has("config-file") || fs::exists("config.xml")) + access_control.setNoPasswordAllowed(getClientConfiguration().getBool("allow_no_password", true)); + access_control.setPlaintextPasswordAllowed(getClientConfiguration().getBool("allow_plaintext_password", true)); + if (getClientConfiguration().has("config-file") || fs::exists("config.xml")) { - String config_path = config().getString("config-file", ""); - bool has_user_directories = config().has("user_directories"); + String config_path = getClientConfiguration().getString("config-file", ""); + bool has_user_directories = getClientConfiguration().has("user_directories"); const auto config_dir = fs::path{config_path}.remove_filename().string(); - String users_config_path = config().getString("users_config", ""); + String users_config_path = getClientConfiguration().getString("users_config", ""); if (users_config_path.empty() && has_user_directories) { - users_config_path = config().getString("user_directories.users_xml.path"); + users_config_path = getClientConfiguration().getString("user_directories.users_xml.path"); if (fs::path(users_config_path).is_relative() && fs::exists(fs::path(config_dir) / users_config_path)) users_config_path = fs::path(config_dir) / users_config_path; } @@ -409,10 +420,10 @@ void LocalServer::setupUsers() void LocalServer::connect() { - connection_parameters = ConnectionParameters(config(), "localhost"); + connection_parameters = ConnectionParameters(getClientConfiguration(), "localhost"); ReadBuffer * in; - auto table_file = config().getString("table-file", "-"); + auto table_file = getClientConfiguration().getString("table-file", "-"); if (table_file == "-" || table_file == "stdin") { in = &std_in; @@ -433,7 +444,7 @@ try UseSSL use_ssl; thread_status.emplace(); - StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true)); + StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces); setupSignalHandler(); @@ -448,7 +459,7 @@ try if (rlim.rlim_cur < rlim.rlim_max) { - rlim.rlim_cur = config().getUInt("max_open_files", static_cast(rlim.rlim_max)); + rlim.rlim_cur = getClientConfiguration().getUInt("max_open_files", static_cast(rlim.rlim_max)); int rc = setrlimit(RLIMIT_NOFILE, &rlim); if (rc != 0) std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n'; @@ -456,8 +467,8 @@ try } is_interactive = stdin_is_a_tty - && (config().hasOption("interactive") - || (queries.empty() && !config().has("table-structure") && queries_files.empty() && !config().has("table-file"))); + && (getClientConfiguration().hasOption("interactive") + || (queries.empty() && !getClientConfiguration().has("table-structure") && queries_files.empty() && !getClientConfiguration().has("table-file"))); if (!is_interactive) { @@ -481,7 +492,7 @@ try SCOPE_EXIT({ cleanup(); }); - initTTYBuffer(toProgressOption(config().getString("progress", "default"))); + initTTYBuffer(toProgressOption(getClientConfiguration().getString("progress", "default"))); ASTAlterCommand::setFormatAlterCommandsWithParentheses(true); applyCmdSettings(global_context); @@ -489,7 +500,7 @@ try /// try to load user defined executable functions, throw on error and die try { - global_context->loadOrReloadUserDefinedExecutableFunctions(config()); + global_context->loadOrReloadUserDefinedExecutableFunctions(getClientConfiguration()); } catch (...) { @@ -530,7 +541,7 @@ try } catch (const DB::Exception & e) { - bool need_print_stack_trace = config().getBool("stacktrace", false); + bool need_print_stack_trace = getClientConfiguration().getBool("stacktrace", false); std::cerr << getExceptionMessage(e, need_print_stack_trace, true) << std::endl; return e.code() ? e.code() : -1; } @@ -542,42 +553,42 @@ catch (...) void LocalServer::updateLoggerLevel(const String & logs_level) { - config().setString("logger.level", logs_level); - updateLevels(config(), logger()); + getClientConfiguration().setString("logger.level", logs_level); + updateLevels(getClientConfiguration(), logger()); } void LocalServer::processConfig() { - if (!queries.empty() && config().has("queries-file")) + if (!queries.empty() && getClientConfiguration().has("queries-file")) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time"); - if (config().has("multiquery")) + if (getClientConfiguration().has("multiquery")) is_multiquery = true; - pager = config().getString("pager", ""); + pager = getClientConfiguration().getString("pager", ""); - delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file")); + delayed_interactive = getClientConfiguration().has("interactive") && (!queries.empty() || getClientConfiguration().has("queries-file")); if (!is_interactive || delayed_interactive) { - echo_queries = config().hasOption("echo") || config().hasOption("verbose"); - ignore_error = config().getBool("ignore-error", false); + echo_queries = getClientConfiguration().hasOption("echo") || getClientConfiguration().hasOption("verbose"); + ignore_error = getClientConfiguration().getBool("ignore-error", false); } - print_stack_trace = config().getBool("stacktrace", false); + print_stack_trace = getClientConfiguration().getBool("stacktrace", false); const std::string clickhouse_dialect{"clickhouse"}; - load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false) - && config().getString("dialect", clickhouse_dialect) == clickhouse_dialect; - wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false); + load_suggestions = (is_interactive || delayed_interactive) && !getClientConfiguration().getBool("disable_suggestion", false) + && getClientConfiguration().getString("dialect", clickhouse_dialect) == clickhouse_dialect; + wait_for_suggestions_to_load = getClientConfiguration().getBool("wait_for_suggestions_to_load", false); - auto logging = (config().has("logger.console") - || config().has("logger.level") - || config().has("log-level") - || config().has("send_logs_level") - || config().has("logger.log")); + auto logging = (getClientConfiguration().has("logger.console") + || getClientConfiguration().has("logger.level") + || getClientConfiguration().has("log-level") + || getClientConfiguration().has("send_logs_level") + || getClientConfiguration().has("logger.log")); - auto level = config().getString("log-level", "trace"); + auto level = getClientConfiguration().getString("log-level", "trace"); - if (config().has("server_logs_file")) + if (getClientConfiguration().has("server_logs_file")) { auto poco_logs_level = Poco::Logger::parseLevel(level); Poco::Logger::root().setLevel(poco_logs_level); @@ -587,10 +598,10 @@ void LocalServer::processConfig() } else { - config().setString("logger", "logger"); + getClientConfiguration().setString("logger", "logger"); auto log_level_default = logging ? level : "fatal"; - config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default))); - buildLoggers(config(), logger(), "clickhouse-local"); + getClientConfiguration().setString("logger.level", getClientConfiguration().getString("log-level", getClientConfiguration().getString("send_logs_level", log_level_default))); + buildLoggers(getClientConfiguration(), logger(), "clickhouse-local"); } shared_context = Context::createShared(); @@ -604,13 +615,13 @@ void LocalServer::processConfig() LoggerRawPtr log = &logger(); /// Maybe useless - if (config().has("macros")) - global_context->setMacros(std::make_unique(config(), "macros", log)); + if (getClientConfiguration().has("macros")) + global_context->setMacros(std::make_unique(getClientConfiguration(), "macros", log)); setDefaultFormatsAndCompressionFromConfiguration(); /// Sets external authenticators config (LDAP, Kerberos). - global_context->setExternalAuthenticatorsConfig(config()); + global_context->setExternalAuthenticatorsConfig(getClientConfiguration()); setupUsers(); @@ -619,12 +630,43 @@ void LocalServer::processConfig() global_context->getProcessList().setMaxSize(0); const size_t physical_server_memory = getMemoryAmount(); - const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5); + + size_t max_server_memory_usage = server_settings.max_server_memory_usage; + double max_server_memory_usage_to_ram_ratio = server_settings.max_server_memory_usage_to_ram_ratio; + + size_t default_max_server_memory_usage = static_cast(physical_server_memory * max_server_memory_usage_to_ram_ratio); + + if (max_server_memory_usage == 0) + { + max_server_memory_usage = default_max_server_memory_usage; + LOG_INFO(log, "Setting max_server_memory_usage was set to {}" + " ({} available * {:.2f} max_server_memory_usage_to_ram_ratio)", + formatReadableSizeWithBinarySuffix(max_server_memory_usage), + formatReadableSizeWithBinarySuffix(physical_server_memory), + max_server_memory_usage_to_ram_ratio); + } + else if (max_server_memory_usage > default_max_server_memory_usage) + { + max_server_memory_usage = default_max_server_memory_usage; + LOG_INFO(log, "Setting max_server_memory_usage was lowered to {}" + " because the system has low amount of memory. The amount was" + " calculated as {} available" + " * {:.2f} max_server_memory_usage_to_ram_ratio", + formatReadableSizeWithBinarySuffix(max_server_memory_usage), + formatReadableSizeWithBinarySuffix(physical_server_memory), + max_server_memory_usage_to_ram_ratio); + } + + total_memory_tracker.setHardLimit(max_server_memory_usage); + total_memory_tracker.setDescription("(total)"); + total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); + + const double cache_size_to_ram_max_ratio = server_settings.cache_size_to_ram_max_ratio; const size_t max_cache_size = static_cast(physical_server_memory * cache_size_to_ram_max_ratio); - String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY); - size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE); - double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO); + String uncompressed_cache_policy = server_settings.uncompressed_cache_policy; + size_t uncompressed_cache_size = server_settings.uncompressed_cache_size; + double uncompressed_cache_size_ratio = server_settings.uncompressed_cache_size_ratio; if (uncompressed_cache_size > max_cache_size) { uncompressed_cache_size = max_cache_size; @@ -632,9 +674,9 @@ void LocalServer::processConfig() } global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio); - String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY); - size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE); - double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO); + String mark_cache_policy = server_settings.mark_cache_policy; + size_t mark_cache_size = server_settings.mark_cache_size; + double mark_cache_size_ratio = server_settings.mark_cache_size_ratio; if (!mark_cache_size) LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); if (mark_cache_size > max_cache_size) @@ -644,9 +686,9 @@ void LocalServer::processConfig() } global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio); - String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY); - size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE); - double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO); + String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy; + size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size; + double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio; if (index_uncompressed_cache_size > max_cache_size) { index_uncompressed_cache_size = max_cache_size; @@ -654,9 +696,9 @@ void LocalServer::processConfig() } global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio); - String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY); - size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE); - double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO); + String index_mark_cache_policy = server_settings.index_mark_cache_policy; + size_t index_mark_cache_size = server_settings.index_mark_cache_size; + double index_mark_cache_size_ratio = server_settings.index_mark_cache_size_ratio; if (index_mark_cache_size > max_cache_size) { index_mark_cache_size = max_cache_size; @@ -664,7 +706,7 @@ void LocalServer::processConfig() } global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio); - size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE); + size_t mmap_cache_size = server_settings.mmap_cache_size; if (mmap_cache_size > max_cache_size) { mmap_cache_size = max_cache_size; @@ -676,8 +718,8 @@ void LocalServer::processConfig() global_context->setQueryCache(0, 0, 0, 0); #if USE_EMBEDDED_COMPILER - size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); - size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + size_t compiled_expression_cache_max_size_in_bytes = server_settings.compiled_expression_cache_size; + size_t compiled_expression_cache_max_elements = server_settings.compiled_expression_cache_elements_size; CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); #endif @@ -689,16 +731,16 @@ void LocalServer::processConfig() applyCmdOptions(global_context); /// Load global settings from default_profile and system_profile. - global_context->setDefaultProfiles(config()); + global_context->setDefaultProfiles(getClientConfiguration()); /// We load temporary database first, because projections need it. DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase(); - std::string default_database = config().getString("default_database", "default"); + std::string default_database = server_settings.default_database; DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context)); global_context->setCurrentDatabase(default_database); - if (config().has("path")) + if (getClientConfiguration().has("path")) { String path = global_context->getPath(); fs::create_directories(fs::path(path)); @@ -713,7 +755,7 @@ void LocalServer::processConfig() attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks); - if (!config().has("only-system-tables")) + if (!getClientConfiguration().has("only-system-tables")) { DatabaseCatalog::instance().createBackgroundTasks(); waitLoad(loadMetadata(global_context)); @@ -725,18 +767,15 @@ void LocalServer::processConfig() LOG_DEBUG(log, "Loaded metadata."); } - else if (!config().has("no-system-tables")) + else if (!getClientConfiguration().has("no-system-tables")) { attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); } - server_display_name = config().getString("display_name", getFQDNOrHostName()); - prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) "); - std::map prompt_substitutions{{"display_name", server_display_name}}; - for (const auto & [key, value] : prompt_substitutions) - boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value); + server_display_name = getClientConfiguration().getString("display_name", ""); + prompt_by_server_display_name = getClientConfiguration().getRawString("prompt_by_server_display_name.default", ":) "); global_context->setQueryKindInitial(); global_context->setQueryKind(query_kind); @@ -814,7 +853,7 @@ void LocalServer::applyCmdSettings(ContextMutablePtr context) void LocalServer::applyCmdOptions(ContextMutablePtr context) { - context->setDefaultFormat(config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"))); + context->setDefaultFormat(getClientConfiguration().getString("output-format", getClientConfiguration().getString("format", is_interactive ? "PrettyCompact" : "TSV"))); applyCmdSettings(context); } @@ -822,33 +861,33 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context) void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector &, const std::vector &) { if (options.count("table")) - config().setString("table-name", options["table"].as()); + getClientConfiguration().setString("table-name", options["table"].as()); if (options.count("file")) - config().setString("table-file", options["file"].as()); + getClientConfiguration().setString("table-file", options["file"].as()); if (options.count("structure")) - config().setString("table-structure", options["structure"].as()); + getClientConfiguration().setString("table-structure", options["structure"].as()); if (options.count("no-system-tables")) - config().setBool("no-system-tables", true); + getClientConfiguration().setBool("no-system-tables", true); if (options.count("only-system-tables")) - config().setBool("only-system-tables", true); + getClientConfiguration().setBool("only-system-tables", true); if (options.count("database")) - config().setString("default_database", options["database"].as()); + getClientConfiguration().setString("default_database", options["database"].as()); if (options.count("input-format")) - config().setString("table-data-format", options["input-format"].as()); + getClientConfiguration().setString("table-data-format", options["input-format"].as()); if (options.count("output-format")) - config().setString("output-format", options["output-format"].as()); + getClientConfiguration().setString("output-format", options["output-format"].as()); if (options.count("logger.console")) - config().setBool("logger.console", options["logger.console"].as()); + getClientConfiguration().setBool("logger.console", options["logger.console"].as()); if (options.count("logger.log")) - config().setString("logger.log", options["logger.log"].as()); + getClientConfiguration().setString("logger.log", options["logger.log"].as()); if (options.count("logger.level")) - config().setString("logger.level", options["logger.level"].as()); + getClientConfiguration().setString("logger.level", options["logger.level"].as()); if (options.count("send_logs_level")) - config().setString("send_logs_level", options["send_logs_level"].as()); + getClientConfiguration().setString("send_logs_level", options["send_logs_level"].as()); if (options.count("wait_for_suggestions_to_load")) - config().setBool("wait_for_suggestions_to_load", true); + getClientConfiguration().setBool("wait_for_suggestions_to_load", true); } void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector &, std::vector &) diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index 4856e68ff9b..da2466650a7 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -30,6 +30,9 @@ public: int main(const std::vector & /*args*/) override; protected: + + Poco::Util::LayeredConfiguration & getClientConfiguration() override; + void connect() override; void processError(const String & query) const override; @@ -63,6 +66,8 @@ private: void applyCmdOptions(ContextMutablePtr context); void applyCmdSettings(ContextMutablePtr context); + ServerSettings server_settings; + std::optional status; std::optional temporary_directory_to_delete; diff --git a/programs/main.cpp b/programs/main.cpp index c270388f17f..61e2bc18ed7 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -13,6 +13,7 @@ #include +#include "config.h" #include "config_tools.h" #include @@ -439,6 +440,14 @@ extern "C" } #endif +/// Prevent messages from JeMalloc in the release build. +/// Some of these messages are non-actionable for the users, such as: +/// : Number of CPUs detected is not deterministic. Per-CPU arena disabled. +#if USE_JEMALLOC && defined(NDEBUG) && !defined(SANITIZER) +extern "C" void (*malloc_message)(void *, const char *s); +__attribute__((constructor(0))) void init_je_malloc_message() { malloc_message = [](void *, const char *){}; } +#endif + /// This allows to implement assert to forbid initialization of a class in static constructors. /// Usage: /// diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 83839cc21ac..14af330f788 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -13,7 +13,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES getIdentifierQuote.cpp odbc-bridge.cpp validateODBCConnectionString.cpp - createFunctionBaseCast.cpp ) clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) @@ -25,6 +24,7 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE clickhouse_parsers ch_contrib::nanodbc ch_contrib::unixodbc + clickhouse_functions ) set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) diff --git a/programs/odbc-bridge/ODBCSource.cpp b/programs/odbc-bridge/ODBCSource.cpp index 940970f36ab..41a9813ce50 100644 --- a/programs/odbc-bridge/ODBCSource.cpp +++ b/programs/odbc-bridge/ODBCSource.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -47,9 +48,17 @@ Chunk ODBCSource::generate() for (int idx = 0; idx < result.columns(); ++idx) { const auto & sample = description.sample_block.getByPosition(idx); - if (!result.is_null(idx)) - insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + { + if (columns[idx]->isNullable()) + { + ColumnNullable & column_nullable = assert_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), removeNullable(sample.type), description.types[idx].first, result, idx); + column_nullable.getNullMapData().emplace_back(0); + } + else + insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx); + } else insertDefaultValue(*columns[idx], *sample.column); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8fcb9d87a93..4cb3b5f45c7 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -70,7 +72,6 @@ #include #include #include -#include #include #include #include @@ -721,11 +722,6 @@ try CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); - Poco::ThreadPool server_pool(3, server_settings.max_connections); - std::mutex servers_lock; - std::vector servers; - std::vector servers_to_start_before_tables; - /** Context contains all that query execution is dependent: * settings, available functions, data types, aggregate functions, databases, ... */ @@ -773,7 +769,27 @@ try LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info); #endif - bool will_have_trace_collector = hasPHDRCache() && config().has("trace_log"); + bool has_trace_collector = false; + /// Disable it if we collect test coverage information, because it will work extremely slow. +#if !WITH_COVERAGE + /// Profilers cannot work reliably with any other libunwind or without PHDR cache. + has_trace_collector = hasPHDRCache() && config().has("trace_log"); +#endif + + /// Describe multiple reasons when query profiler cannot work. + +#if WITH_COVERAGE + LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they work extremely slow with test coverage."); +#endif + +#if defined(SANITIZER) + LOG_INFO(log, "Query Profiler disabled because they cannot work under sanitizers" + " when two different stack unwinding methods will interfere with each other."); +#endif + + if (!hasPHDRCache()) + LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created" + " (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe)."); // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will @@ -782,8 +798,39 @@ try server_settings.max_thread_pool_size, server_settings.max_thread_pool_free_size, server_settings.thread_pool_queue_size, - will_have_trace_collector ? server_settings.global_profiler_real_time_period_ns : 0, - will_have_trace_collector ? server_settings.global_profiler_cpu_time_period_ns : 0); + has_trace_collector ? server_settings.global_profiler_real_time_period_ns : 0, + has_trace_collector ? server_settings.global_profiler_cpu_time_period_ns : 0); + + if (has_trace_collector) + { + global_context->createTraceCollector(); + + /// Set up server-wide memory profiler (for total memory tracker). + if (server_settings.total_memory_profiler_step) + total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step); + + if (server_settings.total_memory_tracker_sample_probability > 0.0) + total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability); + + if (server_settings.total_memory_profiler_sample_min_allocation_size) + total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size); + + if (server_settings.total_memory_profiler_sample_max_allocation_size) + total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); + } + + Poco::ThreadPool server_pool( + /* minCapacity */3, + /* maxCapacity */server_settings.max_connections, + /* idleTime */60, + /* stackSize */POCO_THREAD_STACK_SIZE, + server_settings.global_profiler_real_time_period_ns, + server_settings.global_profiler_cpu_time_period_ns); + + std::mutex servers_lock; + std::vector servers; + std::vector servers_to_start_before_tables; + /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed). SCOPE_EXIT({ Stopwatch watch; @@ -944,6 +991,20 @@ try } } + std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); + fs::path path = path_str; + + /// Check that the process user id matches the owner of the data. + assertProcessUserMatchesDataOwner(path_str, [&](const std::string & message){ global_context->addWarningMessage(message); }); + + global_context->setPath(path_str); + + StatusFile status{path / "status", StatusFile::write_full_info}; + + ServerUUID::load(path / "uuid", log); + + PlacementInfo::PlacementInfo::instance().initialize(config()); + zkutil::validateZooKeeperConfig(config()); bool has_zookeeper = zkutil::hasZooKeeperConfig(config()); @@ -955,7 +1016,7 @@ try ConfigProcessor config_processor(config_path); loaded_config = config_processor.loadConfigWithZooKeeperIncludes( main_config_zk_node_cache, main_config_zk_changed_event, /* fallback_to_preprocessed = */ true); - config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH)); + config_processor.savePreprocessedConfig(loaded_config, path_str); config().removeConfiguration(old_configuration.get()); config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); global_context->setConfig(loaded_config.configuration); @@ -1089,19 +1150,6 @@ try global_context->setRemoteHostFilter(config()); global_context->setHTTPHeaderFilter(config()); - std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); - fs::path path = path_str; - std::string default_database = server_settings.default_database.toString(); - - /// Check that the process user id matches the owner of the data. - assertProcessUserMatchesDataOwner(path_str, [&](const std::string & message){ global_context->addWarningMessage(message); }); - - global_context->setPath(path_str); - - StatusFile status{path / "status", StatusFile::write_full_info}; - - ServerUUID::load(path / "uuid", log); - /// Try to increase limit on number of open files. { rlimit rlim; @@ -1334,12 +1382,12 @@ try global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows); #if USE_EMBEDDED_COMPILER - size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); - size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + size_t compiled_expression_cache_max_size_in_bytes = server_settings.compiled_expression_cache_size; + size_t compiled_expression_cache_max_elements = server_settings.compiled_expression_cache_elements_size; CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); #endif - NamedCollectionUtils::loadIfNot(); + NamedCollectionFactory::instance().loadIfNot(); /// Initialize main config reloader. std::string include_from_path = config().getString("include_from", "/etc/metrika.xml"); @@ -1361,8 +1409,8 @@ try tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization"); } - const std::string cert_path = config().getString("openSSL.server.certificateFile", ""); - const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); + std::string cert_path = config().getString("openSSL.server.certificateFile", ""); + std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; if (!cert_path.empty()) @@ -1370,6 +1418,18 @@ try if (!key_path.empty()) extra_paths.emplace_back(key_path); + Poco::Util::AbstractConfiguration::Keys protocols; + config().keys("protocols", protocols); + for (const auto & protocol : protocols) + { + cert_path = config().getString("protocols." + protocol + ".certificateFile", ""); + key_path = config().getString("protocols." + protocol + ".privateKeyFile", ""); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); + } + auto main_config_reloader = std::make_unique( config_path, extra_paths, @@ -1482,6 +1542,8 @@ try global_context->setMaxDictionaryNumToWarn(new_server_settings.max_dictionary_num_to_warn); global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn); global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn); + /// Only for system.server_settings + global_context->setConfigReloaderInterval(new_server_settings.config_reload_interval_ms); SlotCount concurrent_threads_soft_limit = UnlimitedSlots; if (new_server_settings.concurrent_threads_soft_limit_num > 0 && new_server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit) @@ -1571,6 +1633,10 @@ try 0, // We don't need any threads one all the parts will be deleted new_server_settings.max_parts_cleaning_thread_pool_size); + + global_context->setMergeWorkload(new_server_settings.merge_workload); + global_context->setMutationWorkload(new_server_settings.mutation_workload); + if (config->has("resources")) { global_context->getResourceManager()->updateConfiguration(*config); @@ -1606,9 +1672,9 @@ try CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs"); #if USE_SSL - CertificateReloader::instance().tryLoad(*config); + CertificateReloader::instance().tryReloadAll(*config); #endif - NamedCollectionUtils::reloadFromConfig(*config); + NamedCollectionFactory::instance().reloadFromConfig(*config); FileCacheFactory::instance().updateSettingsFromConfig(*config); @@ -1632,12 +1698,15 @@ try if (global_context->isServerCompletelyStarted()) CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability); +#if USE_GWP_ASAN + GWPAsan::setForceSampleProbability(new_server_settings.gwp_asan_force_sample_probability); +#endif + ProfileEvents::increment(ProfileEvents::MainConfigLoads); /// Must be the last. latest_config = config; - }, - /* already_loaded = */ false); /// Reload it right now (initial loading) + }); const auto listen_hosts = getListenHosts(config()); const auto interserver_listen_hosts = getInterserverListenHosts(config()); @@ -1750,11 +1819,6 @@ try } - if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX)) - { - PlacementInfo::PlacementInfo::instance().initialize(config()); - } - { std::lock_guard lock(servers_lock); /// We should start interserver communications before (and more important shutdown after) tables. @@ -1889,6 +1953,7 @@ try /// Set current database name before loading tables and databases because /// system logs may copy global context. + std::string default_database = server_settings.default_database.toString(); global_context->setCurrentDatabaseNameInGlobalContext(default_database); LOG_INFO(log, "Loading metadata from {}", path_str); @@ -1950,52 +2015,9 @@ try LOG_DEBUG(log, "Loaded metadata."); - /// Init trace collector only after trace_log system table was created - /// Disable it if we collect test coverage information, because it will work extremely slow. -#if !WITH_COVERAGE - /// Profilers cannot work reliably with any other libunwind or without PHDR cache. - if (hasPHDRCache()) - { + if (has_trace_collector) global_context->initializeTraceCollector(); - /// Set up server-wide memory profiler (for total memory tracker). - if (server_settings.total_memory_profiler_step) - { - total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step); - } - - if (server_settings.total_memory_tracker_sample_probability > 0.0) - { - total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability); - } - - if (server_settings.total_memory_profiler_sample_min_allocation_size) - { - total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size); - } - - if (server_settings.total_memory_profiler_sample_max_allocation_size) - { - total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); - } - } -#endif - - /// Describe multiple reasons when query profiler cannot work. - -#if WITH_COVERAGE - LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they work extremely slow with test coverage."); -#endif - -#if defined(SANITIZER) - LOG_INFO(log, "Query Profiler disabled because they cannot work under sanitizers" - " when two different stack unwinding methods will interfere with each other."); -#endif - - if (!hasPHDRCache()) - LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created" - " (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe)."); - #if defined(OS_LINUX) auto tasks_stats_provider = TasksStatsCounters::findBestAvailableProvider(); if (tasks_stats_provider == TasksStatsCounters::MetricsProvider::None) @@ -2124,6 +2146,10 @@ try CannotAllocateThreadFaultInjector::setFaultProbability(server_settings.cannot_allocate_thread_fault_injection_probability); +#if USE_GWP_ASAN + GWPAsan::setForceSampleProbability(server_settings.gwp_asan_force_sample_probability); +#endif + try { global_context->startClusterDiscovery(); diff --git a/programs/server/config.xml b/programs/server/config.xml index 4b3248d9d1c..94825a55f67 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -29,7 +29,14 @@ --> 1000M 10 + + + + + + + - + true @@ -408,13 +415,11 @@ - 5368709120 + You should not lower this value. --> + - - 5368709120 + + - 1000 + - 134217728 + - 10000 + + + + /var/lib/clickhouse/caches/ @@ -1155,6 +1170,18 @@ false + + + system + error_log
+ 7500 + 1048576 + 8192 + 524288 + 1000 + false +
+ + + + - - - 1073741824 - 1024 - 1048576 - 30000000 - - backups diff --git a/programs/server/config.yaml.example b/programs/server/config.yaml.example index 9fc188e97aa..5d5499f876c 100644 --- a/programs/server/config.yaml.example +++ b/programs/server/config.yaml.example @@ -260,7 +260,10 @@ uncompressed_cache_size: 8589934592 # Approximate size of mark cache, used in tables of MergeTree family. # In bytes. Cache is single for server. Memory is allocated only on demand. # You should not lower this value. -mark_cache_size: 5368709120 +# mark_cache_size: 5368709120 + +# For marks of secondary indices. +# index_mark_cache_size: 5368709120 # If you enable the `min_bytes_to_use_mmap_io` setting, # the data in MergeTree tables can be read with mmap to avoid copying from kernel to userspace. @@ -277,13 +280,20 @@ mark_cache_size: 5368709120 # in query or server memory usage - because this memory can be discarded similar to OS page cache. # The cache is dropped (the files are closed) automatically on removal of old parts in MergeTree, # also it can be dropped manually by the SYSTEM DROP MMAP CACHE query. -mmap_cache_size: 1000 +# mmap_cache_size: 1024 # Cache size in bytes for compiled expressions. -compiled_expression_cache_size: 134217728 +# compiled_expression_cache_size: 134217728 # Cache size in elements for compiled expressions. -compiled_expression_cache_elements_size: 10000 +# compiled_expression_cache_elements_size: 10000 + +# Configuration for the query cache +# query_cache: +# max_size_in_bytes: 1073741824 +# max_entries: 1024 +# max_entry_size_in_bytes: 1048576 +# max_entry_size_in_rows: 30000000 # Path to data directory, with trailing slash. path: /var/lib/clickhouse/ @@ -726,6 +736,13 @@ metric_log: flush_interval_milliseconds: 7500 collect_interval_milliseconds: 1000 +# Error log contains rows with current values of errors collected with "collect_interval_milliseconds" interval. +error_log: + database: system + table: error_log + flush_interval_milliseconds: 7500 + collect_interval_milliseconds: 1000 + # Asynchronous metric log contains values of metrics from # system.asynchronous_metrics. asynchronous_metric_log: diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index c3bb42160ad..353358fac65 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -261,7 +261,24 @@ AccessControl::AccessControl() } -AccessControl::~AccessControl() = default; +AccessControl::~AccessControl() +{ + try + { + AccessControl::shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + + +void AccessControl::shutdown() +{ + MultipleAccessStorage::shutdown(); + removeAllStorages(); +} void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index d1537219a06..bfaf256ad48 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -53,6 +53,9 @@ public: AccessControl(); ~AccessControl() override; + /// Shutdown the access control and stops all background activity. + void shutdown() override; + /// Initializes access storage (user directories). void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_); diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index bf1fe3feec3..6b9a6e05cf6 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "config.h" @@ -108,6 +109,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::HTTP: throw Authentication::Require("ClickHouse Basic Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::KERBEROS: return external_authenticators.checkKerberosCredentials(auth_data.getKerberosRealm(), *gss_acceptor_context); @@ -149,6 +153,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::SSL_CERTIFICATE: throw Authentication::Require("ClickHouse X.509 Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::SSH_KEY: #if USE_SSH throw Authentication::Require("SSH Keys Authentication"); @@ -193,6 +200,9 @@ bool Authentication::areCredentialsValid( throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::BCRYPT_PASSWORD: return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); @@ -222,11 +232,22 @@ bool Authentication::areCredentialsValid( case AuthenticationType::HTTP: throw Authentication::Require("ClickHouse Basic Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::KERBEROS: throw Authentication::Require(auth_data.getKerberosRealm()); case AuthenticationType::SSL_CERTIFICATE: - return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName()); + for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN}) + { + for (const auto & subject : auth_data.getSSLCertificateSubjects().at(type)) + { + if (ssl_certificate_credentials->getSSLCertificateSubjects().at(type).contains(subject)) + return true; + } + } + return false; case AuthenticationType::SSH_KEY: #if USE_SSH @@ -254,6 +275,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::HTTP: throw Authentication::Require("ClickHouse Basic Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::KERBEROS: throw Authentication::Require(auth_data.getKerberosRealm()); diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index a32215f3d92..5a35eeefe5b 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -15,6 +15,7 @@ #include #include +#include #include "config.h" #if USE_SSL @@ -31,6 +32,7 @@ namespace DB { namespace ErrorCodes { + extern const int AUTHENTICATION_FAILED; extern const int SUPPORT_IS_DISABLED; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; @@ -90,8 +92,10 @@ bool AuthenticationData::Util::checkPasswordBcrypt(std::string_view password [[m { #if USE_BCRYPT int ret = bcrypt_checkpw(password.data(), reinterpret_cast(password_bcrypt.data())); + /// Before 24.6 we didn't validate hashes on creation, so it could be that the stored hash is invalid + /// and it could not be decoded by the library if (ret == -1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "BCrypt library failed: bcrypt_checkpw returned {}", ret); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Internal failure decoding Bcrypt hash"); return (ret == 0); #else throw Exception( @@ -104,7 +108,7 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) { return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash) && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm) - && (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names) + && (lhs.ssl_certificate_subjects == rhs.ssl_certificate_subjects) #if USE_SSH && (lhs.ssh_keys == rhs.ssh_keys) #endif @@ -132,6 +136,7 @@ void AuthenticationData::setPassword(const String & password_) case AuthenticationType::BCRYPT_PASSWORD: case AuthenticationType::NO_PASSWORD: case AuthenticationType::LDAP: + case AuthenticationType::JWT: case AuthenticationType::KERBEROS: case AuthenticationType::SSL_CERTIFICATE: case AuthenticationType::SSH_KEY: @@ -230,6 +235,17 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Password hash for the 'BCRYPT_PASSWORD' authentication type has length {} " "but must be 59 or 60 bytes.", hash.size()); + + auto resized = hash; + resized.resize(64); + +#if USE_BCRYPT + /// Verify that it is a valid hash + int ret = bcrypt_checkpw("", reinterpret_cast(resized.data())); + if (ret == -1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not decode the provided hash with 'bcrypt_hash'"); +#endif + password_hash = hash; password_hash.resize(64); return; @@ -237,6 +253,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash) case AuthenticationType::NO_PASSWORD: case AuthenticationType::LDAP: + case AuthenticationType::JWT: case AuthenticationType::KERBEROS: case AuthenticationType::SSL_CERTIFICATE: case AuthenticationType::SSH_KEY: @@ -261,11 +278,16 @@ String AuthenticationData::getSalt() const return salt; } -void AuthenticationData::setSSLCertificateCommonNames(boost::container::flat_set common_names_) +void AuthenticationData::setSSLCertificateSubjects(SSLCertificateSubjects && ssl_certificate_subjects_) { - if (common_names_.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of common names."); - ssl_certificate_common_names = std::move(common_names_); + if (ssl_certificate_subjects_.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of subjects."); + ssl_certificate_subjects = std::move(ssl_certificate_subjects_); +} + +void AuthenticationData::addSSLCertificateSubject(SSLCertificateSubjects::Type type_, String && subject_) +{ + ssl_certificate_subjects.insert(type_, std::move(subject_)); } std::shared_ptr AuthenticationData::toAST() const @@ -308,6 +330,10 @@ std::shared_ptr AuthenticationData::toAST() const node->children.push_back(std::make_shared(getLDAPServerName())); break; } + case AuthenticationType::JWT: + { + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + } case AuthenticationType::KERBEROS: { const auto & realm = getKerberosRealm(); @@ -319,7 +345,14 @@ std::shared_ptr AuthenticationData::toAST() const } case AuthenticationType::SSL_CERTIFICATE: { - for (const auto & name : getSSLCertificateCommonNames()) + using SSLCertificateSubjects::Type::CN; + using SSLCertificateSubjects::Type::SAN; + + const auto &subjects = getSSLCertificateSubjects(); + SSLCertificateSubjects::Type cert_subject_type = !subjects.at(SAN).empty() ? SAN : CN; + + node->ssl_cert_subject_type = toString(cert_subject_type); + for (const auto & name : getSSLCertificateSubjects().at(cert_subject_type)) node->children.push_back(std::make_shared(name)); break; @@ -493,11 +526,9 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que } else if (query.type == AuthenticationType::SSL_CERTIFICATE) { - boost::container::flat_set common_names; + auto ssl_cert_subject_type = parseSSLCertificateSubjectType(*query.ssl_cert_subject_type); for (const auto & arg : args) - common_names.insert(checkAndGetLiteralArgument(arg, "common_name")); - - auth_data.setSSLCertificateCommonNames(std::move(common_names)); + auth_data.addSSLCertificateSubject(ssl_cert_subject_type, checkAndGetLiteralArgument(arg, "ssl_certificate_subject")); } else if (query.type == AuthenticationType::HTTP) { diff --git a/src/Access/AuthenticationData.h b/src/Access/AuthenticationData.h index c97e0327b56..8093fe1d888 100644 --- a/src/Access/AuthenticationData.h +++ b/src/Access/AuthenticationData.h @@ -2,13 +2,14 @@ #include #include +#include #include #include #include #include #include -#include + #include "config.h" @@ -58,8 +59,9 @@ public: const String & getKerberosRealm() const { return kerberos_realm; } void setKerberosRealm(const String & realm) { kerberos_realm = realm; } - const boost::container::flat_set & getSSLCertificateCommonNames() const { return ssl_certificate_common_names; } - void setSSLCertificateCommonNames(boost::container::flat_set common_names_); + const SSLCertificateSubjects & getSSLCertificateSubjects() const { return ssl_certificate_subjects; } + void setSSLCertificateSubjects(SSLCertificateSubjects && ssl_certificate_subjects_); + void addSSLCertificateSubject(SSLCertificateSubjects::Type type_, String && subject_); #if USE_SSH const std::vector & getSSHKeys() const { return ssh_keys; } @@ -96,7 +98,7 @@ private: Digest password_hash; String ldap_server_name; String kerberos_realm; - boost::container::flat_set ssl_certificate_common_names; + SSLCertificateSubjects ssl_certificate_subjects; String salt; #if USE_SSH std::vector ssh_keys; diff --git a/src/Access/CachedAccessChecking.cpp b/src/Access/CachedAccessChecking.cpp index aa8ef6073d3..0d629e7b77a 100644 --- a/src/Access/CachedAccessChecking.cpp +++ b/src/Access/CachedAccessChecking.cpp @@ -4,12 +4,12 @@ namespace DB { -CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_) +CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_) : CachedAccessChecking(access_, AccessRightsElement{access_flags_}) { } -CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_) +CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_) : access(access_), element(element_) { } diff --git a/src/Access/CachedAccessChecking.h b/src/Access/CachedAccessChecking.h index e87c28dd823..aaeea6ceddc 100644 --- a/src/Access/CachedAccessChecking.h +++ b/src/Access/CachedAccessChecking.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -13,14 +14,14 @@ class ContextAccess; class CachedAccessChecking { public: - CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_); - CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_); + CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_); + CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_); ~CachedAccessChecking(); bool checkAccess(bool throw_if_denied = true); private: - const std::shared_ptr access; + const std::shared_ptr access; const AccessRightsElement element; bool checked = false; bool result = false; diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 7f0eff2184b..e9f24a8c685 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -51,10 +51,11 @@ enum class AccessType : uint8_t M(ALTER_CLEAR_INDEX, "CLEAR INDEX", TABLE, ALTER_INDEX) \ M(ALTER_INDEX, "INDEX", GROUP, ALTER_TABLE) /* allows to execute ALTER ORDER BY or ALTER {ADD|DROP...} INDEX */\ \ - M(ALTER_ADD_STATISTIC, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTIC) \ - M(ALTER_DROP_STATISTIC, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTIC) \ - M(ALTER_MATERIALIZE_STATISTIC, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTIC) \ - M(ALTER_STATISTIC, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER STATISTIC */\ + M(ALTER_ADD_STATISTICS, "ALTER ADD STATISTIC", TABLE, ALTER_STATISTICS) \ + M(ALTER_DROP_STATISTICS, "ALTER DROP STATISTIC", TABLE, ALTER_STATISTICS) \ + M(ALTER_MODIFY_STATISTICS, "ALTER MODIFY STATISTIC", TABLE, ALTER_STATISTICS) \ + M(ALTER_MATERIALIZE_STATISTICS, "ALTER MATERIALIZE STATISTIC", TABLE, ALTER_STATISTICS) \ + M(ALTER_STATISTICS, "STATISTIC", GROUP, ALTER_TABLE) /* allows to execute ALTER STATISTIC */\ \ M(ALTER_ADD_PROJECTION, "ADD PROJECTION", TABLE, ALTER_PROJECTION) \ M(ALTER_DROP_PROJECTION, "DROP PROJECTION", TABLE, ALTER_PROJECTION) \ diff --git a/src/Access/Common/AuthenticationType.cpp b/src/Access/Common/AuthenticationType.cpp index 2cc126ad9b7..427765b8a79 100644 --- a/src/Access/Common/AuthenticationType.cpp +++ b/src/Access/Common/AuthenticationType.cpp @@ -72,6 +72,11 @@ const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType ty static const auto info = make_info(Keyword::HTTP); return info; } + case AuthenticationType::JWT: + { + static const auto info = make_info(Keyword::JWT); + return info; + } case AuthenticationType::MAX: break; } diff --git a/src/Access/Common/AuthenticationType.h b/src/Access/Common/AuthenticationType.h index a68549aff4c..16f4388bbff 100644 --- a/src/Access/Common/AuthenticationType.h +++ b/src/Access/Common/AuthenticationType.h @@ -41,6 +41,9 @@ enum class AuthenticationType : uint8_t /// Authentication through HTTP protocol HTTP, + /// JSON Web Token + JWT, + MAX, }; diff --git a/src/Access/Common/SSLCertificateSubjects.cpp b/src/Access/Common/SSLCertificateSubjects.cpp new file mode 100644 index 00000000000..ca7001a31a2 --- /dev/null +++ b/src/Access/Common/SSLCertificateSubjects.cpp @@ -0,0 +1,95 @@ +#include +#include + +#if USE_SSL +#include +#endif + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +#if USE_SSL +SSLCertificateSubjects extractSSLCertificateSubjects(const Poco::Net::X509Certificate & certificate) +{ + + SSLCertificateSubjects subjects; + if (!certificate.commonName().empty()) + { + subjects.insert(SSLCertificateSubjects::Type::CN, certificate.commonName()); + } + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wused-but-marked-unused" + auto stackof_general_name_deleter = [](void * ptr) { GENERAL_NAMES_free(static_cast(ptr)); }; + std::unique_ptr cert_names( + X509_get_ext_d2i(const_cast(certificate.certificate()), NID_subject_alt_name, nullptr, nullptr), + stackof_general_name_deleter); + + if (STACK_OF(GENERAL_NAME) * names = static_cast(cert_names.get())) + { + for (int i = 0; i < sk_GENERAL_NAME_num(names); ++i) + { + const GENERAL_NAME * name = sk_GENERAL_NAME_value(names, i); + if (name->type == GEN_DNS || name->type == GEN_URI) + { + const char * data = reinterpret_cast(ASN1_STRING_get0_data(name->d.ia5)); + std::size_t len = ASN1_STRING_length(name->d.ia5); + std::string subject = (name->type == GEN_DNS ? "DNS:" : "URI:") + std::string(data, len); + subjects.insert(SSLCertificateSubjects::Type::SAN, std::move(subject)); + } + } + } + +#pragma clang diagnostic pop + return subjects; +} +#endif + + +void SSLCertificateSubjects::insert(const String & subject_type_, String && subject) +{ + insert(parseSSLCertificateSubjectType(subject_type_), std::move(subject)); +} + +void SSLCertificateSubjects::insert(Type subject_type_, String && subject) +{ + subjects[static_cast(subject_type_)].insert(std::move(subject)); +} + +SSLCertificateSubjects::Type parseSSLCertificateSubjectType(const String & type_) +{ + if (type_ == "CN") + return SSLCertificateSubjects::Type::CN; + if (type_ == "SAN") + return SSLCertificateSubjects::Type::SAN; + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown SSL Certificate Subject Type: {}", type_); +} + +String toString(SSLCertificateSubjects::Type type_) +{ + switch (type_) + { + case SSLCertificateSubjects::Type::CN: + return "CN"; + case SSLCertificateSubjects::Type::SAN: + return "SAN"; + } +} + +bool operator==(const SSLCertificateSubjects & lhs, const SSLCertificateSubjects & rhs) +{ + for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN}) + { + if (lhs.at(type) != rhs.at(type)) + return false; + } + return true; +} + +} + diff --git a/src/Access/Common/SSLCertificateSubjects.h b/src/Access/Common/SSLCertificateSubjects.h new file mode 100644 index 00000000000..ec11714d48a --- /dev/null +++ b/src/Access/Common/SSLCertificateSubjects.h @@ -0,0 +1,48 @@ +#pragma once + +#include "config.h" +#include +#include + +#if USE_SSL +# include +#endif + +namespace DB +{ +class SSLCertificateSubjects +{ +public: + using container = boost::container::flat_set; + enum class Type + { + CN, + SAN + }; + +private: + std::array subjects; + +public: + inline const container & at(Type type_) const { return subjects[static_cast(type_)]; } + inline bool empty() + { + for (auto & subject_list : subjects) + { + if (!subject_list.empty()) + return false; + } + return true; + } + void insert(const String & subject_type_, String && subject); + void insert(Type type_, String && subject); + friend bool operator==(const SSLCertificateSubjects & lhs, const SSLCertificateSubjects & rhs); +}; + +String toString(SSLCertificateSubjects::Type type_); +SSLCertificateSubjects::Type parseSSLCertificateSubjectType(const String & type_); + +#if USE_SSL +SSLCertificateSubjects extractSSLCertificateSubjects(const Poco::Net::X509Certificate & certificate); +#endif +} diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 2a658d7aaa2..a2807ecc5ea 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace DB @@ -271,7 +272,7 @@ namespace std::shared_ptr ContextAccess::fromContext(const ContextPtr & context) { - return context->getAccess(); + return ContextAccessWrapper::fromContext(context)->getAccess(); } @@ -360,10 +361,13 @@ void ContextAccess::setUser(const UserPtr & user_) const subscription_for_roles_changes.reset(); enabled_roles = access_control->getEnabledRoles(current_roles, current_roles_with_admin_option); - subscription_for_roles_changes = enabled_roles->subscribeForChanges([this](const std::shared_ptr & roles_info_) + subscription_for_roles_changes = enabled_roles->subscribeForChanges([weak_ptr = weak_from_this()](const std::shared_ptr & roles_info_) { - std::lock_guard lock{mutex}; - setRolesInfo(roles_info_); + auto ptr = weak_ptr.lock(); + if (!ptr) + return; + std::lock_guard lock{ptr->mutex}; + ptr->setRolesInfo(roles_info_); }); setRolesInfo(enabled_roles->getRolesInfo()); @@ -557,7 +561,7 @@ std::shared_ptr ContextAccess::getAccessRightsWithImplicit() template -bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... args) const +bool ContextAccess::checkAccessImplHelper(const ContextPtr & context, AccessFlags flags, const Args &... args) const { if (user_was_dropped) { @@ -570,8 +574,10 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (params.full_access) return true; - auto access_granted = [] + auto access_granted = [&] { + if constexpr (throw_if_denied) + context->addQueryPrivilegesInfo(AccessRightsElement{flags, args...}.toStringWithoutOptions(), true); return true; }; @@ -580,7 +586,10 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg FmtArgs && ...fmt_args [[maybe_unused]]) { if constexpr (throw_if_denied) + { + context->addQueryPrivilegesInfo(AccessRightsElement{flags, args...}.toStringWithoutOptions(), false); throw Exception(error_code, std::move(fmt_string), getUserName(), std::forward(fmt_args)...); + } return false; }; @@ -683,102 +692,102 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg } template -bool ContextAccess::checkAccessImpl(const AccessFlags & flags) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessFlags & flags) const { - return checkAccessImplHelper(flags); + return checkAccessImplHelper(context, flags); } template -bool ContextAccess::checkAccessImpl(const AccessFlags & flags, std::string_view database, const Args &... args) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessFlags & flags, std::string_view database, const Args &... args) const { - return checkAccessImplHelper(flags, database.empty() ? params.current_database : database, args...); + return checkAccessImplHelper(context, flags, database.empty() ? params.current_database : database, args...); } template -bool ContextAccess::checkAccessImplHelper(const AccessRightsElement & element) const +bool ContextAccess::checkAccessImplHelper(const ContextPtr & context, const AccessRightsElement & element) const { assert(!element.grant_option || grant_option); if (element.isGlobalWithParameter()) { if (element.any_parameter) - return checkAccessImpl(element.access_flags); + return checkAccessImpl(context, element.access_flags); else - return checkAccessImpl(element.access_flags, element.parameter); + return checkAccessImpl(context, element.access_flags, element.parameter); } else if (element.any_database) - return checkAccessImpl(element.access_flags); + return checkAccessImpl(context, element.access_flags); else if (element.any_table) - return checkAccessImpl(element.access_flags, element.database); + return checkAccessImpl(context, element.access_flags, element.database); else if (element.any_column) - return checkAccessImpl(element.access_flags, element.database, element.table); + return checkAccessImpl(context, element.access_flags, element.database, element.table); else - return checkAccessImpl(element.access_flags, element.database, element.table, element.columns); + return checkAccessImpl(context, element.access_flags, element.database, element.table, element.columns); } template -bool ContextAccess::checkAccessImpl(const AccessRightsElement & element) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessRightsElement & element) const { if constexpr (grant_option) { - return checkAccessImplHelper(element); + return checkAccessImplHelper(context, element); } else { if (element.grant_option) - return checkAccessImplHelper(element); + return checkAccessImplHelper(context, element); else - return checkAccessImplHelper(element); + return checkAccessImplHelper(context, element); } } template -bool ContextAccess::checkAccessImpl(const AccessRightsElements & elements) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessRightsElements & elements) const { for (const auto & element : elements) - if (!checkAccessImpl(element)) + if (!checkAccessImpl(context, element)) return false; return true; } -bool ContextAccess::isGranted(const AccessFlags & flags) const { return checkAccessImpl(flags); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(flags, database); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(flags, database, table); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(flags, database, table, column); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::isGranted(const AccessRightsElement & element) const { return checkAccessImpl(element); } -bool ContextAccess::isGranted(const AccessRightsElements & elements) const { return checkAccessImpl(elements); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags) const { return checkAccessImpl(context, flags); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(context, flags, database); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(context, flags, database, table); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(context, flags, database, table, column); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessRightsElement & element) const { return checkAccessImpl(context, element); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessRightsElements & elements) const { return checkAccessImpl(context, elements); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags) const { return checkAccessImpl(flags); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(flags, database); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(flags, database, table); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(flags, database, table, column); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::hasGrantOption(const AccessRightsElement & element) const { return checkAccessImpl(element); } -bool ContextAccess::hasGrantOption(const AccessRightsElements & elements) const { return checkAccessImpl(elements); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags) const { return checkAccessImpl(context, flags); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(context, flags, database); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(context, flags, database, table); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(context, flags, database, table, column); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessRightsElement & element) const { return checkAccessImpl(context, element); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const { return checkAccessImpl(context, elements); } -void ContextAccess::checkAccess(const AccessFlags & flags) const { checkAccessImpl(flags); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database) const { checkAccessImpl(flags, database); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(flags, database, table); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(flags, database, table, column); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } -void ContextAccess::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags) const { checkAccessImpl(context, flags); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { checkAccessImpl(context, flags, database); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(context, flags, database, table); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(context, flags, database, table, column); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessRightsElement & element) const { checkAccessImpl(context, element); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessRightsElements & elements) const { checkAccessImpl(context, elements); } -void ContextAccess::checkGrantOption(const AccessFlags & flags) const { checkAccessImpl(flags); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database) const { checkAccessImpl(flags, database); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(flags, database, table); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(flags, database, table, column); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkGrantOption(const AccessRightsElement & element) const { checkAccessImpl(element); } -void ContextAccess::checkGrantOption(const AccessRightsElements & elements) const { checkAccessImpl(elements); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags) const { checkAccessImpl(context, flags); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { checkAccessImpl(context, flags, database); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(context, flags, database, table); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(context, flags, database, table, column); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessRightsElement & element) const { checkAccessImpl(context, element); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const { checkAccessImpl(context, elements); } template -bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const GetNameFunction & get_name_function) const +bool ContextAccess::checkAdminOptionImplHelper(const ContextPtr & context, const Container & role_ids, const GetNameFunction & get_name_function) const { auto show_error = [](int error_code [[maybe_unused]], FormatStringHelper fmt_string [[maybe_unused]], @@ -801,7 +810,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const if (!std::size(role_ids)) return true; - if (isGranted(AccessType::ROLE_ADMIN)) + if (isGranted(context, AccessType::ROLE_ADMIN)) return true; auto info = getRolesInfo(); @@ -837,54 +846,54 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const } template -bool ContextAccess::checkAdminOptionImpl(const UUID & role_id) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id) const { - return checkAdminOptionImplHelper(to_array(role_id), [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); + return checkAdminOptionImplHelper(context, to_array(role_id), [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); } template -bool ContextAccess::checkAdminOptionImpl(const UUID & role_id, const String & role_name) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const String & role_name) const { - return checkAdminOptionImplHelper(to_array(role_id), [&role_name](const UUID &, size_t) { return std::optional{role_name}; }); + return checkAdminOptionImplHelper(context, to_array(role_id), [&role_name](const UUID &, size_t) { return std::optional{role_name}; }); } template -bool ContextAccess::checkAdminOptionImpl(const UUID & role_id, const std::unordered_map & names_of_roles) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const { - return checkAdminOptionImplHelper(to_array(role_id), [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); + return checkAdminOptionImplHelper(context, to_array(role_id), [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); } template -bool ContextAccess::checkAdminOptionImpl(const std::vector & role_ids) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids) const { - return checkAdminOptionImplHelper(role_ids, [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); + return checkAdminOptionImplHelper(context, role_ids, [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); } template -bool ContextAccess::checkAdminOptionImpl(const std::vector & role_ids, const Strings & names_of_roles) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const { - return checkAdminOptionImplHelper(role_ids, [&names_of_roles](const UUID &, size_t i) { return std::optional{names_of_roles[i]}; }); + return checkAdminOptionImplHelper(context, role_ids, [&names_of_roles](const UUID &, size_t i) { return std::optional{names_of_roles[i]}; }); } template -bool ContextAccess::checkAdminOptionImpl(const std::vector & role_ids, const std::unordered_map & names_of_roles) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const { - return checkAdminOptionImplHelper(role_ids, [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); + return checkAdminOptionImplHelper(context, role_ids, [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); } -bool ContextAccess::hasAdminOption(const UUID & role_id) const { return checkAdminOptionImpl(role_id); } -bool ContextAccess::hasAdminOption(const UUID & role_id, const String & role_name) const { return checkAdminOptionImpl(role_id, role_name); } -bool ContextAccess::hasAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(role_id, names_of_roles); } -bool ContextAccess::hasAdminOption(const std::vector & role_ids) const { return checkAdminOptionImpl(role_ids); } -bool ContextAccess::hasAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { return checkAdminOptionImpl(role_ids, names_of_roles); } -bool ContextAccess::hasAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(role_ids, names_of_roles); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const UUID & role_id) const { return checkAdminOptionImpl(context, role_id); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const { return checkAdminOptionImpl(context, role_id, role_name); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(context, role_id, names_of_roles); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const std::vector & role_ids) const { return checkAdminOptionImpl(context, role_ids); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const { return checkAdminOptionImpl(context, role_ids, names_of_roles); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(context, role_ids, names_of_roles); } -void ContextAccess::checkAdminOption(const UUID & role_id) const { checkAdminOptionImpl(role_id); } -void ContextAccess::checkAdminOption(const UUID & role_id, const String & role_name) const { checkAdminOptionImpl(role_id, role_name); } -void ContextAccess::checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(role_id, names_of_roles); } -void ContextAccess::checkAdminOption(const std::vector & role_ids) const { checkAdminOptionImpl(role_ids); } -void ContextAccess::checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { checkAdminOptionImpl(role_ids, names_of_roles); } -void ContextAccess::checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(role_ids, names_of_roles); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const UUID & role_id) const { checkAdminOptionImpl(context, role_id); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const { checkAdminOptionImpl(context, role_id, role_name); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(context, role_id, names_of_roles); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const std::vector & role_ids) const { checkAdminOptionImpl(context, role_ids); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const { checkAdminOptionImpl(context, role_ids, names_of_roles); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(context, role_ids, names_of_roles); } void ContextAccess::checkGranteeIsAllowed(const UUID & grantee_id, const IAccessEntity & grantee) const @@ -916,4 +925,10 @@ void ContextAccess::checkGranteesAreAllowed(const std::vector & grantee_id } } +std::shared_ptr ContextAccessWrapper::fromContext(const ContextPtr & context) +{ + return context->getAccess(); +} + + } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 237c423d261..465932af1d3 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -4,9 +4,12 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include @@ -71,59 +74,59 @@ public: /// Checks if a specified access is granted, and throws an exception if not. /// Empty database means the current database. - void checkAccess(const AccessFlags & flags) const; - void checkAccess(const AccessFlags & flags, std::string_view database) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - void checkAccess(const AccessRightsElement & element) const; - void checkAccess(const AccessRightsElements & elements) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + void checkAccess(const ContextPtr & context, const AccessRightsElement & element) const; + void checkAccess(const ContextPtr & context, const AccessRightsElements & elements) const; - void checkGrantOption(const AccessFlags & flags) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - void checkGrantOption(const AccessRightsElement & element) const; - void checkGrantOption(const AccessRightsElements & elements) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + void checkGrantOption(const ContextPtr & context, const AccessRightsElement & element) const; + void checkGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const; /// Checks if a specified access is granted, and returns false if not. /// Empty database means the current database. - bool isGranted(const AccessFlags & flags) const; - bool isGranted(const AccessFlags & flags, std::string_view database) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - bool isGranted(const AccessRightsElement & element) const; - bool isGranted(const AccessRightsElements & elements) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + bool isGranted(const ContextPtr & context, const AccessRightsElement & element) const; + bool isGranted(const ContextPtr & context, const AccessRightsElements & elements) const; - bool hasGrantOption(const AccessFlags & flags) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - bool hasGrantOption(const AccessRightsElement & element) const; - bool hasGrantOption(const AccessRightsElements & elements) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + bool hasGrantOption(const ContextPtr & context, const AccessRightsElement & element) const; + bool hasGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const; /// Checks if a specified role is granted with admin option, and throws an exception if not. - void checkAdminOption(const UUID & role_id) const; - void checkAdminOption(const UUID & role_id, const String & role_name) const; - void checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const; - void checkAdminOption(const std::vector & role_ids) const; - void checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const; - void checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; + void checkAdminOption(const ContextPtr & context, const UUID & role_id) const; + void checkAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const; + void checkAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const; + void checkAdminOption(const ContextPtr & context, const std::vector & role_ids) const; + void checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const; + void checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const; /// Checks if a specified role is granted with admin option, and returns false if not. - bool hasAdminOption(const UUID & role_id) const; - bool hasAdminOption(const UUID & role_id, const String & role_name) const; - bool hasAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const; - bool hasAdminOption(const std::vector & role_ids) const; - bool hasAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const; - bool hasAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; + bool hasAdminOption(const ContextPtr & context, const UUID & role_id) const; + bool hasAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const; + bool hasAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const; + bool hasAdminOption(const ContextPtr & context, const std::vector & role_ids) const; + bool hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const; + bool hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const; /// Checks if a grantee is allowed for the current user, throws an exception if not. void checkGranteeIsAllowed(const UUID & grantee_id, const IAccessEntity & grantee) const; @@ -142,43 +145,43 @@ private: void calculateAccessRights() const TSA_REQUIRES(mutex); template - bool checkAccessImpl(const AccessFlags & flags) const; + bool checkAccessImpl(const ContextPtr & context, const AccessFlags & flags) const; template - bool checkAccessImpl(const AccessFlags & flags, std::string_view database, const Args &... args) const; + bool checkAccessImpl(const ContextPtr & context, const AccessFlags & flags, std::string_view database, const Args &... args) const; template - bool checkAccessImpl(const AccessRightsElement & element) const; + bool checkAccessImpl(const ContextPtr & context, const AccessRightsElement & element) const; template - bool checkAccessImpl(const AccessRightsElements & elements) const; + bool checkAccessImpl(const ContextPtr & context, const AccessRightsElements & elements) const; template - bool checkAccessImplHelper(AccessFlags flags, const Args &... args) const; + bool checkAccessImplHelper(const ContextPtr & context, AccessFlags flags, const Args &... args) const; template - bool checkAccessImplHelper(const AccessRightsElement & element) const; + bool checkAccessImplHelper(const ContextPtr & context, const AccessRightsElement & element) const; template - bool checkAdminOptionImpl(const UUID & role_id) const; + bool checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id) const; template - bool checkAdminOptionImpl(const UUID & role_id, const String & role_name) const; + bool checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const String & role_name) const; template - bool checkAdminOptionImpl(const UUID & role_id, const std::unordered_map & names_of_roles) const; + bool checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const; template - bool checkAdminOptionImpl(const std::vector & role_ids) const; + bool checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids) const; template - bool checkAdminOptionImpl(const std::vector & role_ids, const Strings & names_of_roles) const; + bool checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const; template - bool checkAdminOptionImpl(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; + bool checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const; template - bool checkAdminOptionImplHelper(const Container & role_ids, const GetNameFunction & get_name_function) const; + bool checkAdminOptionImplHelper(const ContextPtr & context, const Container & role_ids, const GetNameFunction & get_name_function) const; const AccessControl * access_control = nullptr; const Params params; @@ -203,4 +206,115 @@ private: mutable std::shared_ptr enabled_settings TSA_GUARDED_BY(mutex); }; +/// This wrapper was added to be able to pass the current context to the access +/// without the need to change the signature and all calls to the ContextAccess itself. +/// Right now a context is used to store privileges that are checked for a query, +/// and might be useful for something else in the future as well. +class ContextAccessWrapper : public std::enable_shared_from_this +{ +public: + using ContextAccessPtr = std::shared_ptr; + + ContextAccessWrapper(const ContextAccessPtr & access_, const ContextPtr & context_): access(access_), context(context_) {} + ~ContextAccessWrapper() = default; + + static std::shared_ptr fromContext(const ContextPtr & context); + + const ContextAccess::Params & getParams() const { return access->getParams(); } + + const ContextAccessPtr & getAccess() const { return access; } + + /// Returns the current user. Throws if user is nullptr. + ALWAYS_INLINE UserPtr getUser() const { return access->getUser(); } + /// Same as above, but can return nullptr. + ALWAYS_INLINE UserPtr tryGetUser() const { return access->tryGetUser(); } + ALWAYS_INLINE String getUserName() const { return access->getUserName(); } + ALWAYS_INLINE std::optional getUserID() const { return access->getUserID(); } + + /// Returns information about current and enabled roles. + ALWAYS_INLINE std::shared_ptr getRolesInfo() const { return access->getRolesInfo(); } + + /// Returns the row policy filter for a specified table. + /// The function returns nullptr if there is no filter to apply. + ALWAYS_INLINE RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const { return access->getRowPolicyFilter(database, table_name, filter_type); } + + /// Returns the quota to track resource consumption. + ALWAYS_INLINE std::shared_ptr getQuota() const { return access->getQuota(); } + ALWAYS_INLINE std::optional getQuotaUsage() const { return access->getQuotaUsage(); } + + /// Returns the default settings, i.e. the settings which should be applied on user's login. + ALWAYS_INLINE SettingsChanges getDefaultSettings() const { return access->getDefaultSettings(); } + ALWAYS_INLINE std::shared_ptr getDefaultProfileInfo() const { return access->getDefaultProfileInfo(); } + + /// Returns the current access rights. + ALWAYS_INLINE std::shared_ptr getAccessRights() const { return access->getAccessRights(); } + ALWAYS_INLINE std::shared_ptr getAccessRightsWithImplicit() const { return access->getAccessRightsWithImplicit(); } + + /// Checks if a specified access is granted, and throws an exception if not. + /// Empty database means the current database. + ALWAYS_INLINE void checkAccess(const AccessFlags & flags) const { access->checkAccess(context, flags); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database) const { access->checkAccess(context, flags, database); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const { access->checkAccess(context, flags, database, table); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { access->checkAccess(context, flags, database, table, column); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { access->checkAccess(context, flags, database, table, columns); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { access->checkAccess(context, flags, database, table, columns); } + ALWAYS_INLINE void checkAccess(const AccessRightsElement & element) const { access->checkAccess(context, element); } + ALWAYS_INLINE void checkAccess(const AccessRightsElements & elements) const { access->checkAccess(context, elements); } + + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags) const { access->checkGrantOption(context, flags); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database) const { access->checkGrantOption(context, flags, database); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { access->checkGrantOption(context, flags, database, table); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { access->checkGrantOption(context, flags, database, table, column); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { access->checkGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { access->checkGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE void checkGrantOption(const AccessRightsElement & element) const { access->checkGrantOption(context, element); } + ALWAYS_INLINE void checkGrantOption(const AccessRightsElements & elements) const { access->checkGrantOption(context, elements); } + + /// Checks if a specified access is granted, and returns false if not. + /// Empty database means the current database. + ALWAYS_INLINE bool isGranted(const AccessFlags & flags) const { return access->isGranted(context, flags); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database) const { return access->isGranted(context, flags, database); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table) const { return access->isGranted(context, flags, database, table); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return access->isGranted(context, flags, database, table, column); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return access->isGranted(context, flags, database, table, columns); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return access->isGranted(context, flags, database, table, columns); } + ALWAYS_INLINE bool isGranted(const AccessRightsElement & element) const { return access->isGranted(context, element); } + ALWAYS_INLINE bool isGranted(const AccessRightsElements & elements) const { return access->isGranted(context, elements); } + + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags) const { return access->hasGrantOption(context, flags); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database) const { return access->hasGrantOption(context, flags, database); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { return access->hasGrantOption(context, flags, database, table); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return access->hasGrantOption(context, flags, database, table, column); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return access->hasGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return access->hasGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE bool hasGrantOption(const AccessRightsElement & element) const { return access->hasGrantOption(context, element); } + ALWAYS_INLINE bool hasGrantOption(const AccessRightsElements & elements) const { return access->hasGrantOption(context, elements); } + + /// Checks if a specified role is granted with admin option, and throws an exception if not. + ALWAYS_INLINE void checkAdminOption(const UUID & role_id) const { access->checkAdminOption(context, role_id); } + ALWAYS_INLINE void checkAdminOption(const UUID & role_id, const String & role_name) const { access->checkAdminOption(context, role_id, role_name); } + ALWAYS_INLINE void checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { access->checkAdminOption(context, role_id, names_of_roles); } + ALWAYS_INLINE void checkAdminOption(const std::vector & role_ids) const { access->checkAdminOption(context, role_ids); } + ALWAYS_INLINE void checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { access->checkAdminOption(context, role_ids, names_of_roles); } + ALWAYS_INLINE void checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { access->checkAdminOption(context, role_ids, names_of_roles); } + + /// Checks if a specified role is granted with admin option, and returns false if not. + ALWAYS_INLINE bool hasAdminOption(const UUID & role_id) const { return access->hasAdminOption(context, role_id); } + ALWAYS_INLINE bool hasAdminOption(const UUID & role_id, const String & role_name) const { return access->hasAdminOption(context, role_id, role_name); } + ALWAYS_INLINE bool hasAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { return access->hasAdminOption(context, role_id, names_of_roles); } + ALWAYS_INLINE bool hasAdminOption(const std::vector & role_ids) const { return access->hasAdminOption(context, role_ids); } + ALWAYS_INLINE bool hasAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { return access->hasAdminOption(context, role_ids, names_of_roles); } + ALWAYS_INLINE bool hasAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { return access->hasAdminOption(context, role_ids, names_of_roles); } + + /// Checks if a grantee is allowed for the current user, throws an exception if not. + ALWAYS_INLINE void checkGranteeIsAllowed(const UUID & grantee_id, const IAccessEntity & grantee) const { access->checkGranteeIsAllowed(grantee_id, grantee); } + /// Checks if grantees are allowed for the current user, throws an exception if not. + ALWAYS_INLINE void checkGranteesAreAllowed(const std::vector & grantee_ids) const { access->checkGranteesAreAllowed(grantee_ids); } + +private: + ContextAccessPtr access; + ContextPtr context; +}; + + } diff --git a/src/Access/Credentials.cpp b/src/Access/Credentials.cpp index f9886c0182b..f01700b6e46 100644 --- a/src/Access/Credentials.cpp +++ b/src/Access/Credentials.cpp @@ -1,7 +1,7 @@ #include +#include #include - namespace DB { @@ -48,18 +48,18 @@ void AlwaysAllowCredentials::setUserName(const String & user_name_) user_name = user_name_; } -SSLCertificateCredentials::SSLCertificateCredentials(const String & user_name_, const String & common_name_) +SSLCertificateCredentials::SSLCertificateCredentials(const String & user_name_, SSLCertificateSubjects && subjects_) : Credentials(user_name_) - , common_name(common_name_) + , certificate_subjects(subjects_) { is_ready = true; } -const String & SSLCertificateCredentials::getCommonName() const +const SSLCertificateSubjects & SSLCertificateCredentials::getSSLCertificateSubjects() const { if (!isReady()) throwNotReady(); - return common_name; + return certificate_subjects; } BasicCredentials::BasicCredentials() diff --git a/src/Access/Credentials.h b/src/Access/Credentials.h index d04f8a66541..5f6b0269eef 100644 --- a/src/Access/Credentials.h +++ b/src/Access/Credentials.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include #include #include "config.h" @@ -42,11 +44,11 @@ class SSLCertificateCredentials : public Credentials { public: - explicit SSLCertificateCredentials(const String & user_name_, const String & common_name_); - const String & getCommonName() const; + explicit SSLCertificateCredentials(const String & user_name_, SSLCertificateSubjects && subjects_); + const SSLCertificateSubjects & getSSLCertificateSubjects() const; private: - String common_name; + SSLCertificateSubjects certificate_subjects; }; class BasicCredentials diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index fe698b32816..ee422f7d8ff 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -194,11 +194,9 @@ DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String DiskAccessStorage::~DiskAccessStorage() { - stopListsWritingThread(); - try { - writeLists(); + DiskAccessStorage::shutdown(); } catch (...) { @@ -207,6 +205,17 @@ DiskAccessStorage::~DiskAccessStorage() } +void DiskAccessStorage::shutdown() +{ + stopListsWritingThread(); + + { + std::lock_guard lock{mutex}; + writeLists(); + } +} + + String DiskAccessStorage::getStorageParamsJSON() const { std::lock_guard lock{mutex}; diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h index 5d94008b34f..38172b26970 100644 --- a/src/Access/DiskAccessStorage.h +++ b/src/Access/DiskAccessStorage.h @@ -18,6 +18,8 @@ public: DiskAccessStorage(const String & storage_name_, const String & directory_path_, AccessChangesNotifier & changes_notifier_, bool readonly_, bool allow_backup_); ~DiskAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } String getStorageParamsJSON() const override; diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 4f980bf9212..e88b1601f32 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -44,6 +44,11 @@ public: explicit IAccessStorage(const String & storage_name_) : storage_name(storage_name_) {} virtual ~IAccessStorage() = default; + /// If the AccessStorage has to do some complicated work when destroying - do it in advance. + /// For example, if the AccessStorage contains any threads for background work - ask them to complete and wait for completion. + /// By default, does nothing. + virtual void shutdown() {} + /// Returns the name of this storage. const String & getStorageName() const { return storage_name; } virtual const char * getStorageType() const = 0; diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index a8b508202b5..fda6601e4c6 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -34,11 +34,23 @@ MultipleAccessStorage::MultipleAccessStorage(const String & storage_name_) MultipleAccessStorage::~MultipleAccessStorage() { - /// It's better to remove the storages in the reverse order because they could depend on each other somehow. + try + { + MultipleAccessStorage::shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void MultipleAccessStorage::shutdown() +{ + /// It's better to shutdown the storages in the reverse order because they could depend on each other somehow. const auto storages = getStoragesPtr(); for (const auto & storage : *storages | boost::adaptors::reversed) { - removeStorage(storage); + storage->shutdown(); } } @@ -72,6 +84,16 @@ void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove) ids_cache.clear(); } +void MultipleAccessStorage::removeAllStorages() +{ + /// It's better to remove the storages in the reverse order because they could depend on each other somehow. + const auto storages = getStoragesPtr(); + for (const auto & storage : *storages | boost::adaptors::reversed) + { + removeStorage(storage); + } +} + std::vector MultipleAccessStorage::getStorages() { return *getStoragesPtr(); diff --git a/src/Access/MultipleAccessStorage.h b/src/Access/MultipleAccessStorage.h index 005e6e2b9cd..e1543c59b67 100644 --- a/src/Access/MultipleAccessStorage.h +++ b/src/Access/MultipleAccessStorage.h @@ -21,6 +21,8 @@ public: explicit MultipleAccessStorage(const String & storage_name_ = STORAGE_TYPE); ~MultipleAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } bool isReadOnly() const override; bool isReadOnly(const UUID & id) const override; @@ -32,6 +34,7 @@ public: void setStorages(const std::vector & storages); void addStorage(const StoragePtr & new_storage); void removeStorage(const StoragePtr & storage_to_remove); + void removeAllStorages(); std::vector getStorages(); std::vector getStorages() const; std::shared_ptr> getStoragesPtr(); diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index cd9a86a1bd2..ed114327041 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -66,6 +66,18 @@ ReplicatedAccessStorage::ReplicatedAccessStorage( } ReplicatedAccessStorage::~ReplicatedAccessStorage() +{ + try + { + ReplicatedAccessStorage::shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void ReplicatedAccessStorage::shutdown() { stopWatchingThread(); } diff --git a/src/Access/ReplicatedAccessStorage.h b/src/Access/ReplicatedAccessStorage.h index cddb20860f7..f8518226997 100644 --- a/src/Access/ReplicatedAccessStorage.h +++ b/src/Access/ReplicatedAccessStorage.h @@ -23,6 +23,8 @@ public: ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_, bool allow_backup); ~ReplicatedAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } void startPeriodicReloading() override { startWatchingThread(); } diff --git a/src/Access/SettingsProfilesInfo.cpp b/src/Access/SettingsProfilesInfo.cpp index d8b52ecf5e4..a5eacbe1b6e 100644 --- a/src/Access/SettingsProfilesInfo.cpp +++ b/src/Access/SettingsProfilesInfo.cpp @@ -15,22 +15,8 @@ namespace ErrorCodes bool operator==(const SettingsProfilesInfo & lhs, const SettingsProfilesInfo & rhs) { - if (lhs.settings != rhs.settings) - return false; - - if (lhs.constraints != rhs.constraints) - return false; - - if (lhs.profiles != rhs.profiles) - return false; - - if (lhs.profiles_with_implicit != rhs.profiles_with_implicit) - return false; - - if (lhs.names_of_profiles != rhs.names_of_profiles) - return false; - - return true; + return std::tie(lhs.settings, lhs.constraints, lhs.profiles, lhs.profiles_with_implicit, lhs.names_of_profiles) + == std::tie(rhs.settings, rhs.constraints, rhs.profiles, rhs.profiles_with_implicit, rhs.names_of_profiles); } std::shared_ptr @@ -66,18 +52,20 @@ Strings SettingsProfilesInfo::getProfileNames() const { Strings result; result.reserve(profiles.size()); - for (const auto & profile_id : profiles) + for (const UUID & profile_uuid : profiles) { - const auto p = names_of_profiles.find(profile_id); - if (p != names_of_profiles.end()) - result.push_back(p->second); + const auto names_it = names_of_profiles.find(profile_uuid); + if (names_it != names_of_profiles.end()) + { + result.push_back(names_it->second); + } else { - if (const auto name = access_control.tryReadName(profile_id)) + if (const auto name = access_control.tryReadName(profile_uuid)) // We could've updated cache here, but it is a very rare case, so don't bother. result.push_back(*name); else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to get profile name for {}", toString(profile_id)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to get profile name for {}", toString(profile_uuid)); } } diff --git a/src/Access/SettingsProfilesInfo.h b/src/Access/SettingsProfilesInfo.h index ec289a5ec0a..bc1b01f47d0 100644 --- a/src/Access/SettingsProfilesInfo.h +++ b/src/Access/SettingsProfilesInfo.h @@ -29,7 +29,11 @@ struct SettingsProfilesInfo /// Names of all the profiles in `profiles`. std::unordered_map names_of_profiles; - explicit SettingsProfilesInfo(const AccessControl & access_control_) : constraints(access_control_), access_control(access_control_) {} + explicit SettingsProfilesInfo(const AccessControl & access_control_) + : constraints(access_control_), access_control(access_control_) + { + } + std::shared_ptr getConstraintsAndProfileIDs( const std::shared_ptr & previous = nullptr) const; diff --git a/src/Access/User.cpp b/src/Access/User.cpp index 6a296706baf..c02c598ee40 100644 --- a/src/Access/User.cpp +++ b/src/Access/User.cpp @@ -33,6 +33,8 @@ void User::setName(const String & name_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); if (name_.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); + if (name_.starts_with(EncodedUserInfo::JWT_AUTHENTICAION_MARKER)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); name = name_; } diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 1f9a977bab6..a030ae96cbb 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -194,18 +195,23 @@ namespace /// Fill list of allowed certificates. Poco::Util::AbstractConfiguration::Keys keys; config.keys(certificates_config, keys); - boost::container::flat_set common_names; for (const String & key : keys) { if (key.starts_with("common_name")) { String value = config.getString(certificates_config + "." + key); - common_names.insert(std::move(value)); + user->auth_data.addSSLCertificateSubject(SSLCertificateSubjects::Type::CN, std::move(value)); + } + else if (key.starts_with("subject_alt_name")) + { + String value = config.getString(certificates_config + "." + key); + if (value.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected ssl_certificates.subject_alt_name to not be empty"); + user->auth_data.addSSLCertificateSubject(SSLCertificateSubjects::Type::SAN, std::move(value)); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown certificate pattern type: {}", key); } - user->auth_data.setSSLCertificateCommonNames(std::move(common_names)); } else if (has_ssh_keys) { @@ -880,8 +886,7 @@ void UsersConfigAccessStorage::load( Settings::checkNoSettingNamesAtTopLevel(*new_config, users_config_path); parseFromConfig(*new_config); access_control.getChangesNotifier().sendNotifications(); - }, - /* already_loaded = */ false); + }); } void UsersConfigAccessStorage::startPeriodicReloading() diff --git a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp new file mode 100644 index 00000000000..1c059dc52aa --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp @@ -0,0 +1,283 @@ +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ +struct Settings; + +namespace ErrorCodes +{ + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +struct GroupConcatDataBase +{ + UInt64 data_size = 0; + UInt64 allocated_size = 0; + char * data = nullptr; + + void checkAndUpdateSize(UInt64 add, Arena * arena) + { + if (data_size + add >= allocated_size) + { + auto old_size = allocated_size; + allocated_size = std::max(2 * allocated_size, data_size + add); + data = arena->realloc(data, old_size, allocated_size); + } + } + + void insertChar(const char * str, UInt64 str_size, Arena * arena) + { + checkAndUpdateSize(str_size, arena); + memcpy(data + data_size, str, str_size); + data_size += str_size; + } + + void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena) + { + WriteBufferFromOwnString buff; + serialization->serializeText(*column, row_num, buff, FormatSettings{}); + auto string = buff.stringView(); + insertChar(string.data(), string.size(), arena); + } + +}; + +template +struct GroupConcatData; + +template<> +struct GroupConcatData final : public GroupConcatDataBase +{ +}; + +template<> +struct GroupConcatData final : public GroupConcatDataBase +{ + using Offset = UInt64; + using Allocator = MixedAlignedArenaAllocator; + using Offsets = PODArray; + + /// offset[i * 2] - beginning of the i-th row, offset[i * 2 + 1] - end of the i-th row + Offsets offsets; + UInt64 num_rows = 0; + + UInt64 getSize(size_t i) const { return offsets[i * 2 + 1] - offsets[i * 2]; } + + UInt64 getString(size_t i) const { return offsets[i * 2]; } + + void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena) + { + WriteBufferFromOwnString buff; + serialization->serializeText(*column, row_num, buff, {}); + auto string = buff.stringView(); + + checkAndUpdateSize(string.size(), arena); + memcpy(data + data_size, string.data(), string.size()); + offsets.push_back(data_size, arena); + data_size += string.size(); + offsets.push_back(data_size, arena); + num_rows++; + } +}; + +template +class GroupConcatImpl final + : public IAggregateFunctionDataHelper, GroupConcatImpl> +{ + static constexpr auto name = "groupConcat"; + + SerializationPtr serialization; + UInt64 limit; + const String delimiter; + +public: + GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_) + : IAggregateFunctionDataHelper, GroupConcatImpl>( + {data_type_}, parameters_, std::make_shared()) + , serialization(this->argument_types[0]->getDefaultSerialization()) + , limit(limit_) + , delimiter(delimiter_) + { + } + + String getName() const override { return name; } + + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + auto & cur_data = this->data(place); + + if constexpr (has_limit) + if (cur_data.num_rows >= limit) + return; + + if (cur_data.data_size != 0) + cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena); + + cur_data.insert(columns[0], serialization, row_num, arena); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override + { + auto & cur_data = this->data(place); + auto & rhs_data = this->data(rhs); + + if (rhs_data.data_size == 0) + return; + + if constexpr (has_limit) + { + UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows); + for (UInt64 i = 0; i < new_elems_count; ++i) + { + if (cur_data.data_size != 0) + cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena); + + cur_data.offsets.push_back(cur_data.data_size, arena); + cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena); + cur_data.num_rows++; + cur_data.offsets.push_back(cur_data.data_size, arena); + } + } + else + { + if (cur_data.data_size != 0) + cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena); + + cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena); + } + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override + { + auto & cur_data = this->data(place); + + writeVarUInt(cur_data.data_size, buf); + + buf.write(cur_data.data, cur_data.data_size); + + if constexpr (has_limit) + { + writeVarUInt(cur_data.num_rows, buf); + for (const auto & offset : cur_data.offsets) + writeVarUInt(offset, buf); + } + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override + { + auto & cur_data = this->data(place); + + UInt64 temp_size = 0; + readVarUInt(temp_size, buf); + + cur_data.checkAndUpdateSize(temp_size, arena); + + buf.readStrict(cur_data.data + cur_data.data_size, temp_size); + cur_data.data_size = temp_size; + + if constexpr (has_limit) + { + readVarUInt(cur_data.num_rows, buf); + cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena); + for (auto & offset : cur_data.offsets) + readVarUInt(offset, buf); + } + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + auto & cur_data = this->data(place); + + if (cur_data.data_size == 0) + { + to.insertDefault(); + return; + } + + auto & column_string = assert_cast(to); + column_string.insertData(cur_data.data, cur_data.data_size); + } + + bool allocatesMemoryInArena() const override { return true; } +}; + +AggregateFunctionPtr createAggregateFunctionGroupConcat( + const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) +{ + assertUnary(name, argument_types); + + bool has_limit = false; + UInt64 limit = 0; + String delimiter; + + if (parameters.size() > 2) + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, + "Incorrect number of parameters for aggregate function {}, should be 0, 1 or 2, got: {}", name, parameters.size()); + + if (!parameters.empty()) + { + auto type = parameters[0].getType(); + if (type != Field::Types::String) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name); + + delimiter = parameters[0].get(); + } + if (parameters.size() == 2) + { + auto type = parameters[1].getType(); + + if (type != Field::Types::Int64 && type != Field::Types::UInt64) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name); + + if ((type == Field::Types::Int64 && parameters[1].get() <= 0) || + (type == Field::Types::UInt64 && parameters[1].get() == 0)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get()); + + has_limit = true; + limit = parameters[1].get(); + } + + if (has_limit) + return std::make_shared>(argument_types[0], parameters, limit, delimiter); + else + return std::make_shared>(argument_types[0], parameters, limit, delimiter); +} + +} + +void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory) +{ + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties }); + factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::CaseInsensitive); +} + +} diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp b/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp index 05ed85a9004..6c26065a918 100644 --- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp +++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp @@ -91,7 +91,8 @@ public: return std::make_shared>(); } - bool allocatesMemoryInArena() const override { return false; } + /// MaxIntersectionsData::Allocator uses the arena + bool allocatesMemoryInArena() const override { return true; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h b/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h index 4338dcff5c0..f532858b3d8 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h @@ -228,6 +228,11 @@ public: return prefix_size + nested_func->sizeOfData(); } + size_t alignOfData() const override + { + return std::max(alignof(Data), nested_func->alignOfData()); + } + void create(AggregateDataPtr __restrict place) const override { new (place) Data; diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index d5a4f6b576a..4207ea587b1 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -334,6 +334,18 @@ public: compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params } + Float64 getCountEqual(Float64 value) const + { + Float64 result = 0; + for (const auto & c : centroids) + { + /// std::cerr << "c "<< c.mean << " "<< c.count << std::endl; + if (value == c.mean) + result += c.count; + } + return result; + } + Float64 getCountLessThan(Float64 value) const { bool first = true; diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index 58e657d3723..4ac25e14ee6 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -19,6 +19,7 @@ void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factor void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &); void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &); void registerAggregateFunctionGroupArrayIntersect(AggregateFunctionFactory &); +void registerAggregateFunctionGroupConcat(AggregateFunctionFactory &); void registerAggregateFunctionsQuantile(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &); @@ -120,6 +121,7 @@ void registerAggregateFunctions() registerAggregateFunctionGroupUniqArray(factory); registerAggregateFunctionGroupArrayInsertAt(factory); registerAggregateFunctionGroupArrayIntersect(factory); + registerAggregateFunctionGroupConcat(factory); registerAggregateFunctionsQuantile(factory); registerAggregateFunctionsQuantileDeterministic(factory); registerAggregateFunctionsQuantileExact(factory); diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp index 27d7229d46a..0cfb5d80b2a 100644 --- a/src/Analyzer/ArrayJoinNode.cpp +++ b/src/Analyzer/ArrayJoinNode.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -64,7 +65,12 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const auto * column_node = array_join_expression->as(); if (column_node && column_node->getExpression()) - array_join_expression_ast = column_node->getExpression()->toAST(options); + { + if (const auto * function_node = column_node->getExpression()->as(); function_node && function_node->getFunctionName() == "nested") + array_join_expression_ast = array_join_expression->toAST(options); + else + array_join_expression_ast = column_node->getExpression()->toAST(options); + } else array_join_expression_ast = array_join_expression->toAST(options); diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp index f13842cf67c..e98b04fe9a9 100644 --- a/src/Analyzer/FunctionNode.cpp +++ b/src/Analyzer/FunctionNode.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include @@ -58,12 +60,20 @@ ColumnsWithTypeAndName FunctionNode::getArgumentColumns() const ColumnWithTypeAndName argument_column; + auto * constant = argument->as(); if (isNameOfInFunction(function_name) && i == 1) + { argument_column.type = std::make_shared(); + if (constant) + { + /// Created but not filled for the analysis during function resolution. + FutureSetPtr empty_set; + argument_column.column = ColumnConst::create(ColumnSet::create(1, empty_set), 1); + } + } else argument_column.type = argument->getResultType(); - auto * constant = argument->as(); if (constant && !isNotCreatable(argument_column.type)) argument_column.column = argument_column.type->createColumnConst(1, constant->getValue()); diff --git a/src/Analyzer/Identifier.h b/src/Analyzer/Identifier.h index cbd8f5e7694..91190dc7cdb 100644 --- a/src/Analyzer/Identifier.h +++ b/src/Analyzer/Identifier.h @@ -406,7 +406,7 @@ struct fmt::formatter } template - auto format(const DB::Identifier & identifier, FormatContext & ctx) + auto format(const DB::Identifier & identifier, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}", identifier.getFullName()); } @@ -428,7 +428,7 @@ struct fmt::formatter } template - auto format(const DB::IdentifierView & identifier_view, FormatContext & ctx) + auto format(const DB::IdentifierView & identifier_view, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}", identifier_view.getFullName()); } diff --git a/src/Analyzer/InterpolateNode.cpp b/src/Analyzer/InterpolateNode.cpp index e4f7e22b803..97dc79f565b 100644 --- a/src/Analyzer/InterpolateNode.cpp +++ b/src/Analyzer/InterpolateNode.cpp @@ -10,9 +10,12 @@ namespace DB { -InterpolateNode::InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_) +InterpolateNode::InterpolateNode(std::shared_ptr expression_, QueryTreeNodePtr interpolate_expression_) : IQueryTreeNode(children_size) { + if (expression_) + expression_name = expression_->getIdentifier().getFullName(); + children[expression_child_index] = std::move(expression_); children[interpolate_expression_child_index] = std::move(interpolate_expression_); } @@ -41,13 +44,23 @@ void InterpolateNode::updateTreeHashImpl(HashState &, CompareOptions) const QueryTreeNodePtr InterpolateNode::cloneImpl() const { - return std::make_shared(nullptr /*expression*/, nullptr /*interpolate_expression*/); + auto cloned = std::make_shared(nullptr /*expression*/, nullptr /*interpolate_expression*/); + cloned->expression_name = expression_name; + return cloned; } ASTPtr InterpolateNode::toASTImpl(const ConvertToASTOptions & options) const { auto result = std::make_shared(); - result->column = getExpression()->toAST(options)->getColumnName(); + + /// Interpolate parser supports only identifier node. + /// In case of alias, identifier is replaced to expression, which can't be parsed. + /// In this case, keep original alias name. + if (const auto * identifier = getExpression()->as()) + result->column = identifier->toAST(options)->getColumnName(); + else + result->column = expression_name; + result->children.push_back(getInterpolateExpression()->toAST(options)); result->expr = result->children.back(); diff --git a/src/Analyzer/InterpolateNode.h b/src/Analyzer/InterpolateNode.h index 9269d3924f5..ec493ed8bdd 100644 --- a/src/Analyzer/InterpolateNode.h +++ b/src/Analyzer/InterpolateNode.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include namespace DB @@ -19,7 +19,7 @@ class InterpolateNode final : public IQueryTreeNode { public: /// Initialize interpolate node with expression and interpolate expression - explicit InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_); + explicit InterpolateNode(std::shared_ptr expression_, QueryTreeNodePtr interpolate_expression_); /// Get expression to interpolate const QueryTreeNodePtr & getExpression() const @@ -61,6 +61,9 @@ protected: ASTPtr toASTImpl(const ConvertToASTOptions & options) const override; + /// Initial name from column identifier. + std::string expression_name; + private: static constexpr size_t expression_child_index = 0; static constexpr size_t interpolate_expression_child_index = 1; diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index 9153bc4eca2..e6798a792dd 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -51,7 +51,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void leaveImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions) return; diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 11811ae4f2d..ac221bd66e7 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -551,14 +551,25 @@ private: in_function->getArguments().getNodes() = std::move(in_arguments); in_function->resolveAsFunction(in_function_resolver); + + DataTypePtr result_type = in_function->getResultType(); + const auto * type_low_cardinality = typeid_cast(result_type.get()); + if (type_low_cardinality) + result_type = type_low_cardinality->getDictionaryType(); /** For `k :: UInt8`, expression `k = 1 OR k = NULL` with result type Nullable(UInt8) * is replaced with `k IN (1, NULL)` with result type UInt8. * Convert it back to Nullable(UInt8). + * And for `k :: LowCardinality(UInt8)`, the transformation of `k IN (1, NULL)` results in type LowCardinality(UInt8). + * Convert it to LowCardinality(Nullable(UInt8)). */ - if (is_any_nullable && !in_function->getResultType()->isNullable()) + if (is_any_nullable && !result_type->isNullable()) { - auto nullable_result_type = std::make_shared(in_function->getResultType()); - auto in_function_nullable = createCastFunction(std::move(in_function), std::move(nullable_result_type), getContext()); + DataTypePtr new_result_type = std::make_shared(result_type); + if (type_low_cardinality) + { + new_result_type = std::make_shared(new_result_type); + } + auto in_function_nullable = createCastFunction(std::move(in_function), std::move(new_result_type), getContext()); or_operands.push_back(std::move(in_function_nullable)); } else diff --git a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp index 26ca5984b49..15919c4a2fe 100644 --- a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp +++ b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp @@ -22,6 +22,7 @@ public: if (query_node->hasOrderBy()) { + QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set; QueryTreeNodes result_nodes; auto & query_order_by_nodes = query_node->getOrderBy().getNodes(); @@ -45,10 +46,9 @@ public: query_order_by_nodes = std::move(result_nodes); } - unique_expressions_nodes_set.clear(); - if (query_node->hasLimitBy()) { + QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set; QueryTreeNodes result_nodes; auto & query_limit_by_nodes = query_node->getLimitBy().getNodes(); @@ -63,9 +63,6 @@ public: query_limit_by_nodes = std::move(result_nodes); } } - -private: - QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set; }; } diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index d087fe1c7b9..91186db0e0c 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -41,49 +41,58 @@ public: return; bool replaced_argument = false; - auto & uniq_function_arguments_nodes = function_node->getArguments().getNodes(); + auto replaced_uniq_function_arguments_nodes = function_node->getArguments().getNodes(); - for (auto & uniq_function_argument_node : uniq_function_arguments_nodes) + /// Replace injective function with its single argument + auto remove_injective_function = [&replaced_argument](QueryTreeNodePtr & arg) -> bool { - auto * uniq_function_argument_node_typed = uniq_function_argument_node->as(); - if (!uniq_function_argument_node_typed || !uniq_function_argument_node_typed->isOrdinaryFunction()) - continue; - - auto & uniq_function_argument_node_argument_nodes = uniq_function_argument_node_typed->getArguments().getNodes(); + auto * arg_typed = arg->as(); + if (!arg_typed || !arg_typed->isOrdinaryFunction()) + return false; /// Do not apply optimization if injective function contains multiple arguments - if (uniq_function_argument_node_argument_nodes.size() != 1) - continue; + auto & arg_arguments_nodes = arg_typed->getArguments().getNodes(); + if (arg_arguments_nodes.size() != 1) + return false; - const auto & uniq_function_argument_node_function = uniq_function_argument_node_typed->getFunction(); - if (!uniq_function_argument_node_function->isInjective({})) - continue; + const auto & arg_function = arg_typed->getFunction(); + if (!arg_function->isInjective({})) + return false; - /// Replace injective function with its single argument - uniq_function_argument_node = uniq_function_argument_node_argument_nodes[0]; - replaced_argument = true; + arg = arg_arguments_nodes[0]; + return replaced_argument = true; + }; + + for (auto & uniq_function_argument_node : replaced_uniq_function_arguments_nodes) + { + while (remove_injective_function(uniq_function_argument_node)) + ; } if (!replaced_argument) return; - const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); + DataTypes replaced_argument_types; + replaced_argument_types.reserve(replaced_uniq_function_arguments_nodes.size()); - DataTypes argument_types; - argument_types.reserve(function_node_argument_nodes.size()); - - for (const auto & function_node_argument : function_node_argument_nodes) - argument_types.emplace_back(function_node_argument->getResultType()); + for (const auto & function_node_argument : replaced_uniq_function_arguments_nodes) + replaced_argument_types.emplace_back(function_node_argument->getResultType()); + auto current_aggregate_function = function_node->getAggregateFunction(); AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( + auto replaced_aggregate_function = AggregateFunctionFactory::instance().get( function_node->getFunctionName(), NullsAction::EMPTY, - argument_types, - function_node->getAggregateFunction()->getParameters(), + replaced_argument_types, + current_aggregate_function->getParameters(), properties); - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + /// uniqCombined returns nullable with nullable arguments so the result type might change which breaks the pass + if (!replaced_aggregate_function->getResultType()->equals(*current_aggregate_function->getResultType())) + return; + + function_node->getArguments().getNodes() = std::move(replaced_uniq_function_arguments_nodes); + function_node->resolveAsAggregateFunction(std::move(replaced_aggregate_function)); } }; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 5e5ecaaa93a..576c4943ccb 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -985,18 +987,18 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded( { result_aggregate_function_name = settings.count_distinct_implementation; } - else if (aggregate_function_name_lowercase == "countdistinctif" || aggregate_function_name_lowercase == "countifdistinct") + else if (aggregate_function_name_lowercase == "countifdistinct" || + (settings.rewrite_count_distinct_if_with_count_distinct_implementation && aggregate_function_name_lowercase == "countdistinctif")) { result_aggregate_function_name = settings.count_distinct_implementation; result_aggregate_function_name += "If"; } - - /// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal - if (result_aggregate_function_name.ends_with("ifdistinct")) + else if (aggregate_function_name_lowercase.ends_with("ifdistinct")) { + /// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal size_t prefix_length = result_aggregate_function_name.size() - strlen("ifdistinct"); result_aggregate_function_name = result_aggregate_function_name.substr(0, prefix_length) + "DistinctIf"; - } + } bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull"); if (need_add_or_null) @@ -3495,7 +3497,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi * * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process. */ -ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias) +ProjectionNames QueryAnalyzer::resolveExpressionNode( + QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias) { checkStackSize(); @@ -4505,7 +4508,36 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, table_name = table_identifier[1]; } - auto parametrized_view_storage = scope_context->getQueryContext()->buildParametrizedViewStorage(function_ast, database_name, table_name); + /// Collect parametrized view arguments + NameToNameMap view_params; + for (const auto & argument : table_function_node_typed.getArguments()) + { + if (auto * arg_func = argument->as()) + { + if (arg_func->getFunctionName() != "equals") + continue; + + auto nodes = arg_func->getArguments().getNodes(); + if (nodes.size() != 2) + continue; + + if (auto * identifier_node = nodes[0]->as()) + { + resolveExpressionNode(nodes[1], scope, /* allow_lambda_expression */false, /* allow_table_function */false); + if (auto * constant = nodes[1]->as()) + { + view_params[identifier_node->getIdentifier().getFullName()] = convertFieldToString(constant->getValue()); + } + } + } + } + + auto context = scope_context->getQueryContext(); + auto parametrized_view_storage = context->buildParametrizedViewStorage( + database_name, + table_name, + view_params); + if (parametrized_view_storage) { auto fake_table_node = std::make_shared(parametrized_view_storage, scope_context); diff --git a/src/Analyzer/Resolve/ScopeAliases.h b/src/Analyzer/Resolve/ScopeAliases.h index baab843988b..830ae72144b 100644 --- a/src/Analyzer/Resolve/ScopeAliases.h +++ b/src/Analyzer/Resolve/ScopeAliases.h @@ -75,7 +75,12 @@ struct ScopeAliases if (jt == transitive_aliases.end()) return {}; - key = &(getKey(jt->second, find_option)); + const auto & new_key = getKey(jt->second, find_option); + /// Ignore potential cyclic aliases. + if (new_key == *key) + return {}; + + key = &new_key; it = alias_map.find(*key); } diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp index ceda264b5a6..0ecb3545225 100644 --- a/src/Analyzer/SetUtils.cpp +++ b/src/Analyzer/SetUtils.cpp @@ -9,6 +9,8 @@ #include #include +#include + namespace DB { @@ -54,8 +56,9 @@ size_t getCompoundTypeDepth(const IDataType & type) } template -Block createBlockFromCollection(const Collection & collection, const DataTypes & block_types, bool transform_null_in) +Block createBlockFromCollection(const Collection & collection, const DataTypes& value_types, const DataTypes & block_types, bool transform_null_in) { + assert(collection.size() == value_types.size()); size_t columns_size = block_types.size(); MutableColumns columns(columns_size); for (size_t i = 0; i < columns_size; ++i) @@ -66,13 +69,17 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & Row tuple_values; - for (const auto & value : collection) + for (size_t collection_index = 0; collection_index < collection.size(); ++collection_index) { + const auto & value = collection[collection_index]; if (columns_size == 1) { - auto field = convertFieldToTypeStrict(value, *block_types[0]); + const DataTypePtr & data_type = value_types[collection_index]; + auto field = convertFieldToTypeStrict(value, *data_type, *block_types[0]); if (!field) + { continue; + } bool need_insert_null = transform_null_in && block_types[0]->isNullable(); if (!field->isNull() || need_insert_null) @@ -87,6 +94,9 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & value.getTypeName()); const auto & tuple = value.template get(); + const DataTypePtr & value_type = value_types[collection_index]; + const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); + size_t tuple_size = tuple.size(); if (tuple_size != columns_size) @@ -101,7 +111,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & size_t i = 0; for (; i < tuple_size; ++i) { - auto converted_field = convertFieldToTypeStrict(tuple[i], *block_types[i]); + auto converted_field = convertFieldToTypeStrict(tuple[i], *tuple_value_type[i], *block_types[i]); if (!converted_field) break; tuple_values[i] = std::move(*converted_field); @@ -147,20 +157,28 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const if (lhs_type_depth == rhs_type_depth) { /// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc. - Array array{value}; - result_block = createBlockFromCollection(array, set_element_types, transform_null_in); + DataTypes value_types{value_type}; + result_block = createBlockFromCollection(array, value_types, set_element_types, transform_null_in); } else if (lhs_type_depth + 1 == rhs_type_depth) { /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)) - WhichDataType rhs_which_type(value_type); if (rhs_which_type.isArray()) - result_block = createBlockFromCollection(value.get(), set_element_types, transform_null_in); + { + const DataTypeArray * value_array_type = assert_cast(value_type.get()); + size_t value_array_size = value.get().size(); + DataTypes value_types(value_array_size, value_array_type->getNestedType()); + result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + } else if (rhs_which_type.isTuple()) - result_block = createBlockFromCollection(value.get(), set_element_types, transform_null_in); + { + const DataTypeTuple * value_tuple_type = assert_cast(value_type.get()); + const DataTypes & value_types = value_tuple_type->getElements(); + result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unsupported type at the right-side of IN. Expected Array or Tuple. Actual {}", diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 92f086295a0..56544312c26 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -88,14 +88,10 @@ namespace std::move(headers), S3::CredentialsConfiguration { - settings.auth_settings.use_environment_credentials.value_or( - context->getConfigRef().getBool("s3.use_environment_credentials", true)), - settings.auth_settings.use_insecure_imds_request.value_or( - context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), - settings.auth_settings.expiration_window_seconds.value_or( - context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - settings.auth_settings.no_sign_request.value_or( - context->getConfigRef().getBool("s3.no_sign_request", false)), + settings.auth_settings.use_environment_credentials, + settings.auth_settings.use_insecure_imds_request, + settings.auth_settings.expiration_window_seconds, + settings.auth_settings.no_sign_request }); } @@ -131,12 +127,18 @@ BackupReaderS3::BackupReaderS3( : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{})) { - auto & request_settings = s3_settings.request_settings; - request_settings.updateFromSettingsIfChanged(context_->getSettingsRef()); - request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint - request_settings.allow_native_copy = allow_s3_native_copy; + s3_settings.loadFromConfig(context_->getConfigRef(), "s3", context_->getSettingsRef()); + + if (auto endpoint_settings = context_->getStorageS3Settings().getSettings( + s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup)) + { + s3_settings.updateIfChanged(*endpoint_settings); + } + + s3_settings.request_settings.updateFromSettings(context_->getSettingsRef(), /* if_changed */true); + s3_settings.request_settings.allow_native_copy = allow_s3_native_copy; + client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_); if (auto blob_storage_system_log = context_->getBlobStorageLog()) @@ -223,13 +225,19 @@ BackupWriterS3::BackupWriterS3( : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{})) { - auto & request_settings = s3_settings.request_settings; - request_settings.updateFromSettingsIfChanged(context_->getSettingsRef()); - request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint - request_settings.allow_native_copy = allow_s3_native_copy; - request_settings.setStorageClassName(storage_class_name); + s3_settings.loadFromConfig(context_->getConfigRef(), "s3", context_->getSettingsRef()); + + if (auto endpoint_settings = context_->getStorageS3Settings().getSettings( + s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup)) + { + s3_settings.updateIfChanged(*endpoint_settings); + } + + s3_settings.request_settings.updateFromSettings(context_->getSettingsRef(), /* if_changed */true); + s3_settings.request_settings.allow_native_copy = allow_s3_native_copy; + s3_settings.request_settings.storage_class_name = storage_class_name; + client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_); if (auto blob_storage_system_log = context_->getBlobStorageLog()) { diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index f81eb975df3..327f06363c5 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 8f32c918c61..3f972c36e47 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -24,8 +24,6 @@ #include #include -#include - namespace ProfileEvents { @@ -93,6 +91,7 @@ BackupImpl::BackupImpl( const std::optional & base_backup_info_, std::shared_ptr reader_, const ContextPtr & context_, + bool is_internal_backup_, bool use_same_s3_credentials_for_base_backup_) : backup_info(backup_info_) , backup_name_for_logging(backup_info.toStringForLogging()) @@ -101,7 +100,7 @@ BackupImpl::BackupImpl( , open_mode(OpenMode::READ) , reader(std::move(reader_)) , context(context_) - , is_internal_backup(false) + , is_internal_backup(is_internal_backup_) , version(INITIAL_BACKUP_VERSION) , base_backup_info(base_backup_info_) , use_same_s3_credentials_for_base_backup(use_same_s3_credentials_for_base_backup_) @@ -256,6 +255,7 @@ std::shared_ptr BackupImpl::getBaseBackupUnlocked() const params.backup_info = *base_backup_info; params.open_mode = OpenMode::READ; params.context = context; + params.is_internal_backup = is_internal_backup; /// use_same_s3_credentials_for_base_backup should be inherited for base backups params.use_same_s3_credentials_for_base_backup = use_same_s3_credentials_for_base_backup; diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h index 6fed5fe758b..2b27e2ab090 100644 --- a/src/Backups/BackupImpl.h +++ b/src/Backups/BackupImpl.h @@ -40,6 +40,7 @@ public: const std::optional & base_backup_info_, std::shared_ptr reader_, const ContextPtr & context_, + bool is_internal_backup_, bool use_same_s3_credentials_for_base_backup_); BackupImpl( diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 1a3fdf58cc4..454a0468e9f 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -438,7 +438,7 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ String create_table_query_str = serializeAST(*create_table_query); bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); - auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query); + auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query, context->getCurrentDatabase()); bool table_has_data = backup->hasFiles(data_path_in_backup); std::lock_guard lock{mutex}; diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 81e3c104da1..03d156d1009 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -153,6 +153,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) params.base_backup_info, reader, params.context, + params.is_internal_backup, /* use_same_s3_credentials_for_base_backup*/ false); } else diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index c34dbe273f5..59ed9506af0 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -119,6 +119,7 @@ void registerBackupEngineS3(BackupFactory & factory) params.base_backup_info, reader, params.context, + params.is_internal_backup, params.use_same_s3_credentials_for_base_backup); } else diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp index c633ebb6a5a..35263d39cba 100644 --- a/src/Backups/registerBackupEnginesFileAndDisk.cpp +++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp @@ -177,6 +177,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory) params.base_backup_info, reader, params.context, + params.is_internal_backup, params.use_same_s3_credentials_for_base_backup); } else diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 290a7311448..b18207e55ad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -210,6 +210,7 @@ add_object_library(clickhouse_analyzer_passes Analyzer/Resolve) add_object_library(clickhouse_planner Planner) add_object_library(clickhouse_interpreters Interpreters) add_object_library(clickhouse_interpreters_cache Interpreters/Cache) +add_object_library(clickhouse_interpreters_hash_join Interpreters/HashJoin) add_object_library(clickhouse_interpreters_access Interpreters/Access) add_object_library(clickhouse_interpreters_mysql Interpreters/MySQL) add_object_library(clickhouse_interpreters_clusterproxy Interpreters/ClusterProxy) @@ -222,7 +223,7 @@ add_object_library(clickhouse_storages_mergetree Storages/MergeTree) add_object_library(clickhouse_storages_statistics Storages/Statistics) add_object_library(clickhouse_storages_liveview Storages/LiveView) add_object_library(clickhouse_storages_windowview Storages/WindowView) -add_object_library(clickhouse_storages_s3queue Storages/S3Queue) +add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue) add_object_library(clickhouse_storages_materializedview Storages/MaterializedView) add_object_library(clickhouse_client Client) add_object_library(clickhouse_bridge BridgeHelper) @@ -285,7 +286,7 @@ if (TARGET ch_contrib::llvm) endif () if (TARGET ch_contrib::gwp_asan) - target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::gwp_asan) + target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::gwp_asan) target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::gwp_asan) endif() diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f8391c64d5a..5d472ba99b9 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -44,13 +44,12 @@ #include #include #include -#include #include +#include #include #include #include -#include #include #include #include @@ -110,6 +109,7 @@ namespace ErrorCodes extern const int USER_SESSION_LIMIT_EXCEEDED; extern const int NOT_IMPLEMENTED; extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; + extern const int USER_EXPIRED; } } @@ -302,8 +302,29 @@ public: ClientBase::~ClientBase() = default; -ClientBase::ClientBase() = default; - +ClientBase::ClientBase( + int in_fd_, + int out_fd_, + int err_fd_, + std::istream & input_stream_, + std::ostream & output_stream_, + std::ostream & error_stream_ +) + : std_in(in_fd_) + , std_out(out_fd_) + , progress_indication(output_stream_, in_fd_, err_fd_) + , in_fd(in_fd_) + , out_fd(out_fd_) + , err_fd(err_fd_) + , input_stream(input_stream_) + , output_stream(output_stream_) + , error_stream(error_stream_) +{ + stdin_is_a_tty = isatty(in_fd); + stdout_is_a_tty = isatty(out_fd); + stderr_is_a_tty = isatty(err_fd); + terminal_width = getTerminalWidth(in_fd, err_fd); +} void ClientBase::setupSignalHandler() { @@ -330,7 +351,7 @@ void ClientBase::setupSignalHandler() } -ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error) +ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements) { std::unique_ptr parser; ASTPtr res; @@ -359,7 +380,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting if (!res) { - std::cerr << std::endl << message << std::endl << std::endl; + error_stream << std::endl << message << std::endl << std::endl; return nullptr; } } @@ -373,11 +394,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting if (is_interactive) { - std::cout << std::endl; - WriteBufferFromOStream res_buf(std::cout, 4096); + output_stream << std::endl; + WriteBufferFromOStream res_buf(output_stream, 4096); formatAST(*res, res_buf); res_buf.finalize(); - std::cout << std::endl << std::endl; + output_stream << std::endl << std::endl; } return res; @@ -481,7 +502,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) if (need_render_progress && tty_buf) { if (select_into_file && !select_into_file_and_stdout) - std::cerr << "\r"; + error_stream << "\r"; progress_indication.writeProgress(*tty_buf); } } @@ -741,17 +762,17 @@ bool ClientBase::isRegularFile(int fd) void ClientBase::setDefaultFormatsAndCompressionFromConfiguration() { - if (config().has("output-format")) + if (getClientConfiguration().has("output-format")) { - default_output_format = config().getString("output-format"); + default_output_format = getClientConfiguration().getString("output-format"); is_default_format = false; } - else if (config().has("format")) + else if (getClientConfiguration().has("format")) { - default_output_format = config().getString("format"); + default_output_format = getClientConfiguration().getString("format"); is_default_format = false; } - else if (config().has("vertical")) + else if (getClientConfiguration().has("vertical")) { default_output_format = "Vertical"; is_default_format = false; @@ -777,17 +798,17 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration() default_output_format = "TSV"; } - if (config().has("input-format")) + if (getClientConfiguration().has("input-format")) { - default_input_format = config().getString("input-format"); + default_input_format = getClientConfiguration().getString("input-format"); } - else if (config().has("format")) + else if (getClientConfiguration().has("format")) { - default_input_format = config().getString("format"); + default_input_format = getClientConfiguration().getString("format"); } - else if (config().getString("table-file", "-") != "-") + else if (getClientConfiguration().getString("table-file", "-") != "-") { - auto file_name = config().getString("table-file"); + auto file_name = getClientConfiguration().getString("table-file"); std::optional format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(file_name); if (format_from_file_name) default_input_format = *format_from_file_name; @@ -803,7 +824,7 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration() default_input_format = "TSV"; } - format_max_block_size = config().getUInt64("format_max_block_size", + format_max_block_size = getClientConfiguration().getUInt64("format_max_block_size", global_context->getSettingsRef().max_block_size); /// Setting value from cmd arg overrides one from config @@ -813,7 +834,7 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration() } else { - insert_format_max_block_size = config().getUInt64("insert_format_max_block_size", + insert_format_max_block_size = getClientConfiguration().getUInt64("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); } } @@ -924,9 +945,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) const char * begin = full_query.data(); auto parsed_query = parseQuery(begin, begin + full_query.size(), global_context->getSettingsRef(), - /*allow_multi_statements=*/ false, - is_interactive, - ignore_error); + /*allow_multi_statements=*/ false); if (!parsed_query) return; @@ -1100,7 +1119,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa /// has been received yet. if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED && --retries_left) { - std::cerr << "Got a transient error from the server, will" + error_stream << "Got a transient error from the server, will" << " retry (" << retries_left << " retries left)"; } else @@ -1154,7 +1173,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b double elapsed = receive_watch.elapsedSeconds(); if (break_on_timeout && elapsed > receive_timeout.totalSeconds()) { - std::cout << "Timeout exceeded while receiving data from server." + output_stream << "Timeout exceeded while receiving data from server." << " Waited for " << static_cast(elapsed) << " seconds," << " timeout is " << receive_timeout.totalSeconds() << " seconds." << std::endl; @@ -1187,8 +1206,8 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b if (local_format_error) std::rethrow_exception(local_format_error); - if (cancelled && is_interactive) - std::cout << "Query was cancelled." << std::endl; + if (cancelled && is_interactive && !cancelled_printed.exchange(true)) + output_stream << "Query was cancelled." << std::endl; } @@ -1302,8 +1321,13 @@ void ClientBase::onEndOfStream() resetOutput(); - if (is_interactive && !written_first_block) - std::cout << "Ok." << std::endl; + if (is_interactive) + { + if (cancelled && !cancelled_printed.exchange(true)) + output_stream << "Query was cancelled." << std::endl; + else if (!written_first_block) + output_stream << "Ok." << std::endl; + } } @@ -1855,7 +1879,7 @@ void ClientBase::cancelQuery() progress_indication.clearProgressOutput(*tty_buf); if (is_interactive) - std::cout << "Cancelling query." << std::endl; + output_stream << "Cancelling query." << std::endl; cancelled = true; } @@ -1866,6 +1890,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin resetOutput(); have_error = false; cancelled = false; + cancelled_printed = false; client_exception.reset(); server_exception.reset(); @@ -2017,7 +2042,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin { const String & new_database = use_query->getDatabase(); /// If the client initiates the reconnection, it takes the settings from the config. - config().setString("database", new_database); + getClientConfiguration().setString("database", new_database); /// If the connection initiates the reconnection, it uses its variable. connection->setDefaultDatabase(new_database); } @@ -2037,21 +2062,21 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (is_interactive) { - std::cout << std::endl; + output_stream << std::endl; if (!server_exception || processed_rows != 0) - std::cout << processed_rows << " row" << (processed_rows == 1 ? "" : "s") << " in set. "; - std::cout << "Elapsed: " << progress_indication.elapsedSeconds() << " sec. "; + output_stream << processed_rows << " row" << (processed_rows == 1 ? "" : "s") << " in set. "; + output_stream << "Elapsed: " << progress_indication.elapsedSeconds() << " sec. "; progress_indication.writeFinalProgress(); - std::cout << std::endl << std::endl; + output_stream << std::endl << std::endl; } - else if (print_time_to_stderr) + else if (getClientConfiguration().getBool("print-time-to-stderr", false)) { - std::cerr << progress_indication.elapsedSeconds() << "\n"; + error_stream << progress_indication.elapsedSeconds() << "\n"; } - if (!is_interactive && print_num_processed_rows) + if (!is_interactive && getClientConfiguration().getBool("print-num-processed-rows", false)) { - std::cout << "Processed rows: " << processed_rows << "\n"; + output_stream << "Processed rows: " << processed_rows << "\n"; } if (have_error && report_error) @@ -2101,9 +2126,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( { parsed_query = parseQuery(this_query_end, all_queries_end, global_context->getSettingsRef(), - /*allow_multi_statements=*/ true, - is_interactive, - ignore_error); + /*allow_multi_statements=*/ true); } catch (const Exception & e) { @@ -2262,7 +2285,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) catch (...) { // Surprisingly, this is a client error. A server error would - // have been reported without throwing (see onReceiveSeverException()). + // have been reported without throwing (see onReceiveExceptionFromServer()). client_exception = std::make_unique(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode()); have_error = true; } @@ -2419,12 +2442,12 @@ void ClientBase::initQueryIdFormats() return; /// Initialize query_id_formats if any - if (config().has("query_id_formats")) + if (getClientConfiguration().has("query_id_formats")) { Poco::Util::AbstractConfiguration::Keys keys; - config().keys("query_id_formats", keys); + getClientConfiguration().keys("query_id_formats", keys); for (const auto & name : keys) - query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name)); + query_id_formats.emplace_back(name + ":", getClientConfiguration().getString("query_id_formats." + name)); } if (query_id_formats.empty()) @@ -2469,9 +2492,9 @@ bool ClientBase::addMergeTreeSettings(ASTCreateQuery & ast_create) void ClientBase::runInteractive() { - if (config().has("query_id")) + if (getClientConfiguration().has("query_id")) throw Exception(ErrorCodes::BAD_ARGUMENTS, "query_id could be specified only in non-interactive mode"); - if (print_time_to_stderr) + if (getClientConfiguration().getBool("print-time-to-stderr", false)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "time option could be specified only in non-interactive mode"); initQueryIdFormats(); @@ -2484,9 +2507,9 @@ void ClientBase::runInteractive() { /// Load suggestion data from the server. if (global_context->getApplicationType() == Context::ApplicationType::CLIENT) - suggest->load(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load); + suggest->load(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL) - suggest->load(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load); + suggest->load(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load); } if (home_path.empty()) @@ -2497,8 +2520,8 @@ void ClientBase::runInteractive() } /// Load command history if present. - if (config().has("history_file")) - history_file = config().getString("history_file"); + if (getClientConfiguration().has("history_file")) + history_file = getClientConfiguration().getString("history_file"); else { auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE"); // NOLINT(concurrency-mt-unsafe) @@ -2519,7 +2542,7 @@ void ClientBase::runInteractive() { if (e.getErrno() != EEXIST) { - std::cerr << getCurrentExceptionMessage(false) << '\n'; + error_stream << getCurrentExceptionMessage(false) << '\n'; } } } @@ -2530,13 +2553,13 @@ void ClientBase::runInteractive() #if USE_REPLXX replxx::Replxx::highlighter_callback_t highlight_callback{}; - if (config().getBool("highlight", true)) + if (getClientConfiguration().getBool("highlight", true)) highlight_callback = highlight; ReplxxLineReader lr( *suggest, history_file, - config().has("multiline"), + getClientConfiguration().has("multiline"), query_extenders, query_delimiters, word_break_characters, @@ -2544,7 +2567,7 @@ void ClientBase::runInteractive() #else LineReader lr( history_file, - config().has("multiline"), + getClientConfiguration().has("multiline"), query_extenders, query_delimiters, word_break_characters); @@ -2624,7 +2647,7 @@ void ClientBase::runInteractive() { // If a separate connection loading suggestions failed to open a new session, // use the main session to receive them. - suggest->load(*connection, connection_parameters.timeouts, config().getInt("suggestion_limit"), global_context->getClientInfo()); + suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), global_context->getClientInfo()); } try @@ -2635,8 +2658,11 @@ void ClientBase::runInteractive() } catch (const Exception & e) { + if (e.code() == ErrorCodes::USER_EXPIRED) + break; + /// We don't need to handle the test hints in the interactive mode. - std::cerr << "Exception on client:" << std::endl << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl; + error_stream << "Exception on client:" << std::endl << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl; client_exception.reset(e.clone()); } @@ -2653,11 +2679,11 @@ void ClientBase::runInteractive() while (true); if (isNewYearMode()) - std::cout << "Happy new year." << std::endl; + output_stream << "Happy new year." << std::endl; else if (isChineseNewYearMode(local_tz)) - std::cout << "Happy Chinese new year. 春节快乐!" << std::endl; + output_stream << "Happy Chinese new year. 春节快乐!" << std::endl; else - std::cout << "Bye." << std::endl; + output_stream << "Bye." << std::endl; } @@ -2668,7 +2694,7 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name) ReadBufferFromFile in(file_name); readStringUntilEOF(queries_from_file, in); - if (!has_log_comment) + if (!getClientConfiguration().has("log_comment")) { Settings settings = global_context->getSettings(); /// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]" @@ -2777,13 +2803,13 @@ void ClientBase::clearTerminal() /// It is needed if garbage is left in terminal. /// Show cursor. It can be left hidden by invocation of previous programs. /// A test for this feature: perl -e 'print "x"x100000'; echo -ne '\033[0;0H\033[?25l'; clickhouse-client - std::cout << "\033[0J" "\033[?25h"; + output_stream << "\033[0J" "\033[?25h"; } void ClientBase::showClientVersion() { - std::cout << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; + output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; } namespace @@ -2850,7 +2876,10 @@ private: } - +/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests +#if defined(__clang__) +#pragma clang optimize on +#endif void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) { if (allow_repeated_settings) @@ -3068,18 +3097,18 @@ void ClientBase::init(int argc, char ** argv) if (options.count("version-clean")) { - std::cout << VERSION_STRING; + output_stream << VERSION_STRING; exit(0); // NOLINT(concurrency-mt-unsafe) } if (options.count("verbose")) - config().setBool("verbose", true); + getClientConfiguration().setBool("verbose", true); /// Output of help message. if (options.count("help") || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. { - if (config().getBool("verbose", false)) + if (getClientConfiguration().getBool("verbose", false)) printHelpMessage(options_description, true); else printHelpMessage(options_description_non_verbose, false); @@ -3087,72 +3116,75 @@ void ClientBase::init(int argc, char ** argv) } /// Common options for clickhouse-client and clickhouse-local. + + /// Output execution time to stderr in batch mode. if (options.count("time")) - print_time_to_stderr = true; + getClientConfiguration().setBool("print-time-to-stderr", true); if (options.count("query")) queries = options["query"].as>(); if (options.count("query_id")) - config().setString("query_id", options["query_id"].as()); + getClientConfiguration().setString("query_id", options["query_id"].as()); if (options.count("database")) - config().setString("database", options["database"].as()); + getClientConfiguration().setString("database", options["database"].as()); if (options.count("config-file")) - config().setString("config-file", options["config-file"].as()); + getClientConfiguration().setString("config-file", options["config-file"].as()); if (options.count("queries-file")) queries_files = options["queries-file"].as>(); if (options.count("multiline")) - config().setBool("multiline", true); + getClientConfiguration().setBool("multiline", true); if (options.count("multiquery")) - config().setBool("multiquery", true); + getClientConfiguration().setBool("multiquery", true); if (options.count("ignore-error")) - config().setBool("ignore-error", true); + getClientConfiguration().setBool("ignore-error", true); if (options.count("format")) - config().setString("format", options["format"].as()); + getClientConfiguration().setString("format", options["format"].as()); if (options.count("output-format")) - config().setString("output-format", options["output-format"].as()); + getClientConfiguration().setString("output-format", options["output-format"].as()); if (options.count("vertical")) - config().setBool("vertical", true); + getClientConfiguration().setBool("vertical", true); if (options.count("stacktrace")) - config().setBool("stacktrace", true); + getClientConfiguration().setBool("stacktrace", true); if (options.count("print-profile-events")) - config().setBool("print-profile-events", true); + getClientConfiguration().setBool("print-profile-events", true); if (options.count("profile-events-delay-ms")) - config().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as()); + getClientConfiguration().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as()); + /// Whether to print the number of processed rows at if (options.count("processed-rows")) - print_num_processed_rows = true; + getClientConfiguration().setBool("print-num-processed-rows", true); if (options.count("progress")) { switch (options["progress"].as()) { case DEFAULT: - config().setString("progress", "default"); + getClientConfiguration().setString("progress", "default"); break; case OFF: - config().setString("progress", "off"); + getClientConfiguration().setString("progress", "off"); break; case TTY: - config().setString("progress", "tty"); + getClientConfiguration().setString("progress", "tty"); break; case ERR: - config().setString("progress", "err"); + getClientConfiguration().setString("progress", "err"); break; } } if (options.count("echo")) - config().setBool("echo", true); + getClientConfiguration().setBool("echo", true); if (options.count("disable_suggestion")) - config().setBool("disable_suggestion", true); + getClientConfiguration().setBool("disable_suggestion", true); if (options.count("wait_for_suggestions_to_load")) - config().setBool("wait_for_suggestions_to_load", true); + getClientConfiguration().setBool("wait_for_suggestions_to_load", true); if (options.count("suggestion_limit")) - config().setInt("suggestion_limit", options["suggestion_limit"].as()); + getClientConfiguration().setInt("suggestion_limit", options["suggestion_limit"].as()); if (options.count("highlight")) - config().setBool("highlight", options["highlight"].as()); + getClientConfiguration().setBool("highlight", options["highlight"].as()); if (options.count("history_file")) - config().setString("history_file", options["history_file"].as()); + getClientConfiguration().setString("history_file", options["history_file"].as()); if (options.count("interactive")) - config().setBool("interactive", true); + getClientConfiguration().setBool("interactive", true); if (options.count("pager")) - config().setString("pager", options["pager"].as()); + getClientConfiguration().setString("pager", options["pager"].as()); if (options.count("log-level")) Poco::Logger::root().setLevel(options["log-level"].as()); @@ -3170,13 +3202,13 @@ void ClientBase::init(int argc, char ** argv) alias_names.reserve(options_description.main_description->options().size()); for (const auto& option : options_description.main_description->options()) alias_names.insert(option->long_name()); - argsToConfig(common_arguments, config(), 100, &alias_names); + argsToConfig(common_arguments, getClientConfiguration(), 100, &alias_names); } clearPasswordFromCommandLine(argc, argv); /// Limit on total memory usage - std::string max_client_memory_usage = config().getString("max_memory_usage_in_client", "0" /*default value*/); + std::string max_client_memory_usage = getClientConfiguration().getString("max_memory_usage_in_client", "0" /*default value*/); if (max_client_memory_usage != "0") { UInt64 max_client_memory_usage_int = parseWithSizeSuffix(max_client_memory_usage.c_str(), max_client_memory_usage.length()); @@ -3185,8 +3217,6 @@ void ClientBase::init(int argc, char ** argv) total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); } - - has_log_comment = config().has("log_comment"); } } diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 7a0489641c8..30dc4168996 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -18,7 +18,6 @@ #include #include - namespace po = boost::program_options; @@ -67,13 +66,22 @@ class ClientBase : public Poco::Util::Application, public IHints<2> public: using Arguments = std::vector; - ClientBase(); + explicit ClientBase + ( + int in_fd_ = STDIN_FILENO, + int out_fd_ = STDOUT_FILENO, + int err_fd_ = STDERR_FILENO, + std::istream & input_stream_ = std::cin, + std::ostream & output_stream_ = std::cout, + std::ostream & error_stream_ = std::cerr + ); + ~ClientBase() override; void init(int argc, char ** argv); std::vector getAllRegisteredNames() const override { return cmd_options; } - static ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error); + ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements); protected: void runInteractive(); @@ -82,6 +90,9 @@ protected: char * argv0 = nullptr; void runLibFuzzer(); + /// This is the analogue of Poco::Application::config() + virtual Poco::Util::LayeredConfiguration & getClientConfiguration() = 0; + virtual bool processWithFuzzing(const String &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Query processing with fuzzing is not implemented"); @@ -107,7 +118,7 @@ protected: String & query_to_execute, ASTPtr & parsed_query, const String & all_queries_text, std::unique_ptr & current_exception); - static void clearTerminal(); + void clearTerminal(); void showClientVersion(); using ProgramOptionsDescription = boost::program_options::options_description; @@ -129,6 +140,7 @@ protected: const std::vector & hosts_and_ports_arguments) = 0; virtual void processConfig() = 0; + /// Returns true if query processing was successful. bool processQueryText(const String & text); virtual void readArguments( @@ -205,7 +217,6 @@ protected: bool echo_queries = false; /// Print queries before execution in batch mode. bool ignore_error = false; /// In case of errors, don't print error message, continue to next query. Only applicable for non-interactive mode. - bool print_time_to_stderr = false; /// Output execution time to stderr in batch mode. std::optional suggest; bool load_suggestions = false; @@ -250,9 +261,9 @@ protected: ConnectionParameters connection_parameters; /// Buffer that reads from stdin in batch mode. - ReadBufferFromFileDescriptor std_in{STDIN_FILENO}; + ReadBufferFromFileDescriptor std_in; /// Console output. - WriteBufferFromFileDescriptor std_out{STDOUT_FILENO}; + WriteBufferFromFileDescriptor std_out; std::unique_ptr pager_cmd; /// The user can specify to redirect query output to a file. @@ -283,7 +294,6 @@ protected: bool need_render_profile_events = true; bool written_first_block = false; size_t processed_rows = 0; /// How many rows have been read or written. - bool print_num_processed_rows = false; /// Whether to print the number of processed rows at bool print_stack_trace = false; /// The last exception that was received from the server. Is used for the @@ -328,10 +338,17 @@ protected: bool allow_repeated_settings = false; bool allow_merge_tree_settings = false; - bool cancelled = false; + std::atomic_bool cancelled = false; + std::atomic_bool cancelled_printed = false; + + /// Unpacked descriptors and streams for the ease of use. + int in_fd = STDIN_FILENO; + int out_fd = STDOUT_FILENO; + int err_fd = STDERR_FILENO; + std::istream & input_stream; + std::ostream & output_stream; + std::ostream & error_stream; - /// Does log_comment has specified by user? - bool has_log_comment = false; }; } diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 19cd8cc4ee5..799c7511982 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -37,6 +38,7 @@ #include #include +#include #include "config.h" #if USE_SSL @@ -68,12 +70,23 @@ namespace ErrorCodes extern const int EMPTY_DATA_PASSED; } -Connection::~Connection() = default; +Connection::~Connection() +{ + try{ + if (connected) + Connection::disconnect(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} Connection::Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, [[maybe_unused]] const SSHKey & ssh_private_key_, + const String & jwt_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -86,6 +99,7 @@ Connection::Connection(const String & host_, UInt16 port_, , ssh_private_key(ssh_private_key_) #endif , quota_key(quota_key_) + , jwt(jwt_) , cluster(cluster_) , cluster_secret(cluster_secret_) , client_name(client_name_) @@ -257,13 +271,31 @@ void Connection::connect(const ConnectionTimeouts & timeouts) void Connection::disconnect() { - maybe_compressed_out = nullptr; in = nullptr; last_input_packet_type.reset(); std::exception_ptr finalize_exception; + + try + { + // finalize() can write and throw an exception. + if (maybe_compressed_out) + maybe_compressed_out->finalize(); + } + catch (...) + { + /// Don't throw an exception here, it will leave Connection in invalid state. + finalize_exception = std::current_exception(); + + if (out) + { + out->cancel(); + out = nullptr; + } + } + maybe_compressed_out = nullptr; + try { - // finalize() can write to socket and throw an exception. if (out) out->finalize(); } @@ -276,6 +308,7 @@ void Connection::disconnect() if (socket) socket->close(); + socket = nullptr; connected = false; nonce.reset(); @@ -341,6 +374,11 @@ void Connection::sendHello() performHandshakeForSSHAuth(); } #endif + else if (!jwt.empty()) + { + writeStringBinary(EncodedUserInfo::JWT_AUTHENTICAION_MARKER, *out); + writeStringBinary(jwt, *out); + } else { writeStringBinary(user, *out); @@ -767,6 +805,8 @@ void Connection::sendQuery( } maybe_compressed_in.reset(); + if (maybe_compressed_out && maybe_compressed_out != out) + maybe_compressed_out->cancel(); maybe_compressed_out.reset(); block_in.reset(); block_logs_in.reset(); @@ -1310,6 +1350,7 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa parameters.user, parameters.password, parameters.ssh_private_key, + parameters.jwt, parameters.quota_key, "", /* cluster */ "", /* cluster_secret */ diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 9632eb9d948..0f4b3e436df 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -53,6 +53,7 @@ public: const String & default_database_, const String & user_, const String & password_, const SSHKey & ssh_private_key_, + const String & jwt_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -173,6 +174,7 @@ private: SSHKey ssh_private_key; #endif String quota_key; + String jwt; /// For inter-server authorization String cluster; diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 774f3375f63..303bebc30d2 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -52,31 +52,11 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati /// changed the default value to "default" to fix the issue when the user in the prompt is blank user = config.getString("user", "default"); - if (!config.has("ssh-key-file")) + if (config.has("jwt")) { - bool password_prompt = false; - if (config.getBool("ask-password", false)) - { - if (config.has("password")) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified both --password and --ask-password. Remove one of them"); - password_prompt = true; - } - else - { - password = config.getString("password", ""); - /// if the value of --password is omitted, the password will be set implicitly to "\n" - if (password == ASK_PASSWORD) - password_prompt = true; - } - if (password_prompt) - { - std::string prompt{"Password for user (" + user + "): "}; - char buf[1000] = {}; - if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) - password = result; - } + jwt = config.getString("jwt"); } - else + else if (config.has("ssh-key-file")) { #if USE_SSH std::string filename = config.getString("ssh-key-file"); @@ -102,6 +82,30 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } + else + { + bool password_prompt = false; + if (config.getBool("ask-password", false)) + { + if (config.has("password")) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified both --password and --ask-password. Remove one of them"); + password_prompt = true; + } + else + { + password = config.getString("password", ""); + /// if the value of --password is omitted, the password will be set implicitly to "\n" + if (password == ASK_PASSWORD) + password_prompt = true; + } + if (password_prompt) + { + std::string prompt{"Password for user (" + user + "): "}; + char buf[1000] = {}; + if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) + password = result; + } + } quota_key = config.getString("quota_key", ""); @@ -139,7 +143,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati } UInt16 ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config, - std::string connection_host) + const std::string & connection_host) { bool is_secure = enableSecureConnection(config, connection_host); return config.getInt("port", diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index f23522d48b3..c305c7813f2 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -22,6 +22,7 @@ struct ConnectionParameters std::string password; std::string quota_key; SSHKey ssh_private_key; + std::string jwt; Protocol::Secure security = Protocol::Secure::Disable; Protocol::Compression compression = Protocol::Compression::Enable; ConnectionTimeouts timeouts; @@ -30,7 +31,7 @@ struct ConnectionParameters ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host); ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, std::optional port); - static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, std::string connection_host); + static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & connection_host); /// Ask to enter the user's password if password option contains this value. /// "\n" is used because there is hardly a chance that a user would use '\n' as password. diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index d35c2552461..725a5e91ac0 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -123,7 +123,7 @@ protected: { return std::make_shared( host, port, - default_database, user, password, SSHKey(), quota_key, + default_database, user, password, SSHKey(), /*jwt*/ "", quota_key, cluster, cluster_secret, client_name, compression, secure); } diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index fb4d9a6bdcc..8c993f906e0 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -255,6 +255,17 @@ void HedgedConnections::sendCancel() if (!sent_query || cancelled) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cancel. Either no query sent or already cancelled."); + /// All hedged connections should be stopped, since otherwise before the + /// HedgedConnectionsFactory will be destroyed (that will happen from + /// QueryPipeline dtor) they could still do some work. + /// And not only this does not make sense, but it also could lead to + /// use-after-free of the current_thread, since the thread from which they + /// had been created differs from the thread where the dtor of + /// QueryPipeline will be called and the initial thread could be already + /// destroyed (especially when the system is under pressure). + if (hedged_connections_factory.hasEventsInProcess()) + hedged_connections_factory.stopChoosingReplicas(); + cancelled = true; for (auto & offset_status : offset_states) diff --git a/src/Client/LineReader.cpp b/src/Client/LineReader.cpp index b3559657ced..487ef232fdc 100644 --- a/src/Client/LineReader.cpp +++ b/src/Client/LineReader.cpp @@ -23,14 +23,6 @@ void trim(String & s) s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); } -/// Check if multi-line query is inserted from the paste buffer. -/// Allows delaying the start of query execution until the entirety of query is inserted. -bool hasInputData() -{ - pollfd fd{STDIN_FILENO, POLLIN, 0}; - return poll(&fd, 1, 0) == 1; -} - struct NoCaseCompare { bool operator()(const std::string & str1, const std::string & str2) @@ -63,6 +55,14 @@ void addNewWords(Words & to, const Words & from, Compare comp) namespace DB { +/// Check if multi-line query is inserted from the paste buffer. +/// Allows delaying the start of query execution until the entirety of query is inserted. +bool LineReader::hasInputData() const +{ + pollfd fd{in_fd, POLLIN, 0}; + return poll(&fd, 1, 0) == 1; +} + replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length, const char * word_break_characters) { std::string_view last_word; @@ -131,11 +131,22 @@ void LineReader::Suggest::addWords(Words && new_words) // NOLINT(cppcoreguidelin } } -LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_) +LineReader::LineReader( + const String & history_file_path_, + bool multiline_, + Patterns extenders_, + Patterns delimiters_, + std::istream & input_stream_, + std::ostream & output_stream_, + int in_fd_ +) : history_file_path(history_file_path_) , multiline(multiline_) , extenders(std::move(extenders_)) , delimiters(std::move(delimiters_)) + , input_stream(input_stream_) + , output_stream(output_stream_) + , in_fd(in_fd_) { /// FIXME: check extender != delimiter } @@ -212,9 +223,9 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt) input.clear(); { - std::cout << prompt; - std::getline(std::cin, input); - if (!std::cin.good()) + output_stream << prompt; + std::getline(input_stream, input); + if (!input_stream.good()) return ABORT; } diff --git a/src/Client/LineReader.h b/src/Client/LineReader.h index fc19eaa5667..0172bd7ec22 100644 --- a/src/Client/LineReader.h +++ b/src/Client/LineReader.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -37,7 +39,16 @@ public: using Patterns = std::vector; - LineReader(const String & history_file_path, bool multiline, Patterns extenders, Patterns delimiters); + LineReader( + const String & history_file_path, + bool multiline, + Patterns extenders, + Patterns delimiters, + std::istream & input_stream_ = std::cin, + std::ostream & output_stream_ = std::cout, + int in_fd_ = STDIN_FILENO + ); + virtual ~LineReader() = default; /// Reads the whole line until delimiter (in multiline mode) or until the last line without extender. @@ -56,6 +67,8 @@ public: virtual void enableBracketedPaste() {} virtual void disableBracketedPaste() {} + bool hasInputData() const; + protected: enum InputStatus { @@ -77,6 +90,10 @@ protected: virtual InputStatus readOneLine(const String & prompt); virtual void addToHistory(const String &) {} + + std::istream & input_stream; + std::ostream & output_stream; + int in_fd; }; } diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index c7494e31605..072184e0a66 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -16,7 +16,10 @@ #include #include #include - +#include +#include +#include +#include namespace DB { @@ -151,12 +154,26 @@ void LocalConnection::sendQuery( state->block = sample; String current_format = "Values"; + + const auto & settings = context->getSettingsRef(); const char * begin = state->query.data(); - auto parsed_query = ClientBase::parseQuery(begin, begin + state->query.size(), - context->getSettingsRef(), - /*allow_multi_statements=*/ false, - /*is_interactive=*/ false, - /*ignore_error=*/ false); + const char * end = begin + state->query.size(); + const Dialect & dialect = settings.dialect; + + std::unique_ptr parser; + if (dialect == Dialect::kusto) + parser = std::make_unique(end, settings.allow_settings_after_format_in_insert); + else if (dialect == Dialect::prql) + parser = std::make_unique(settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); + else + parser = std::make_unique(end, settings.allow_settings_after_format_in_insert); + + ASTPtr parsed_query; + if (dialect == Dialect::kusto) + parsed_query = parseKQLQueryAndMovePosition(*parser, begin, end, "", /*allow_multi_statements*/false, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); + else + parsed_query = parseQueryAndMovePosition(*parser, begin, end, "", /*allow_multi_statements*/false, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); + if (const auto * insert = parsed_query->as()) { if (!insert->format.empty()) @@ -341,22 +358,18 @@ bool LocalConnection::poll(size_t) if (!state->is_finished) { - if (send_progress && (state->after_send_progress.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) - { - state->after_send_progress.restart(); - next_packet_type = Protocol::Server::Progress; + if (needSendProgressOrMetrics()) return true; - } - - if (send_profile_events && (state->after_send_profile_events.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) - { - sendProfileEvents(); - return true; - } try { - pollImpl(); + while (pollImpl()) + { + LOG_DEBUG(&Poco::Logger::get("LocalConnection"), "Executor timeout encountered, will retry"); + + if (needSendProgressOrMetrics()) + return true; + } } catch (const Exception & e) { @@ -451,12 +464,34 @@ bool LocalConnection::poll(size_t) return false; } +bool LocalConnection::needSendProgressOrMetrics() +{ + if (send_progress && (state->after_send_progress.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) + { + state->after_send_progress.restart(); + next_packet_type = Protocol::Server::Progress; + return true; + } + + if (send_profile_events && (state->after_send_profile_events.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) + { + sendProfileEvents(); + return true; + } + + return false; +} + bool LocalConnection::pollImpl() { Block block; auto next_read = pullBlock(block); - if (block && !state->io.null_format) + if (!block && next_read) + { + return true; + } + else if (block && !state->io.null_format) { state->block.emplace(block); } @@ -465,7 +500,7 @@ bool LocalConnection::pollImpl() state->is_finished = true; } - return true; + return false; } Packet LocalConnection::receivePacket() diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 899d134cce5..fb6fa1b55eb 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -151,8 +151,11 @@ private: void sendProfileEvents(); + /// Returns true on executor timeout, meaning a retryable error. bool pollImpl(); + bool needSendProgressOrMetrics(); + ContextMutablePtr query_context; Session session; diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp index 9e0f5946205..46600168695 100644 --- a/src/Client/ReplxxLineReader.cpp +++ b/src/Client/ReplxxLineReader.cpp @@ -297,8 +297,15 @@ ReplxxLineReader::ReplxxLineReader( Patterns extenders_, Patterns delimiters_, const char word_break_characters_[], - replxx::Replxx::highlighter_callback_t highlighter_) - : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_)) + replxx::Replxx::highlighter_callback_t highlighter_, + [[ maybe_unused ]] std::istream & input_stream_, + [[ maybe_unused ]] std::ostream & output_stream_, + [[ maybe_unused ]] int in_fd_, + [[ maybe_unused ]] int out_fd_, + [[ maybe_unused ]] int err_fd_ +) + : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_) + , highlighter(std::move(highlighter_)) , word_break_characters(word_break_characters_) , editor(getEditor()) { @@ -471,7 +478,7 @@ ReplxxLineReader::ReplxxLineReader( ReplxxLineReader::~ReplxxLineReader() { - if (close(history_file_fd)) + if (history_file_fd >= 0 && close(history_file_fd)) rx.print("Close of history file failed: %s\n", errnoToString().c_str()); } @@ -496,7 +503,7 @@ void ReplxxLineReader::addToHistory(const String & line) // but replxx::Replxx::history_load() does not // and that is why flock() is added here. bool locked = false; - if (flock(history_file_fd, LOCK_EX)) + if (history_file_fd >= 0 && flock(history_file_fd, LOCK_EX)) rx.print("Lock of history file failed: %s\n", errnoToString().c_str()); else locked = true; @@ -507,7 +514,7 @@ void ReplxxLineReader::addToHistory(const String & line) if (!rx.history_save(history_file_path)) rx.print("Saving history failed: %s\n", errnoToString().c_str()); - if (locked && 0 != flock(history_file_fd, LOCK_UN)) + if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN)) rx.print("Unlock of history file failed: %s\n", errnoToString().c_str()); } diff --git a/src/Client/ReplxxLineReader.h b/src/Client/ReplxxLineReader.h index 6ad149e38f2..c46080420ef 100644 --- a/src/Client/ReplxxLineReader.h +++ b/src/Client/ReplxxLineReader.h @@ -1,6 +1,7 @@ #pragma once -#include "LineReader.h" +#include +#include #include namespace DB @@ -9,14 +10,22 @@ namespace DB class ReplxxLineReader : public LineReader { public: - ReplxxLineReader( + ReplxxLineReader + ( Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, Patterns delimiters_, const char word_break_characters_[], - replxx::Replxx::highlighter_callback_t highlighter_); + replxx::Replxx::highlighter_callback_t highlighter_, + std::istream & input_stream_ = std::cin, + std::ostream & output_stream_ = std::cout, + int in_fd_ = STDIN_FILENO, + int out_fd_ = STDOUT_FILENO, + int err_fd_ = STDERR_FILENO + ); + ~ReplxxLineReader() override; void enableBracketedPaste() override; diff --git a/src/Client/TestHint.h b/src/Client/TestHint.h index eaf854be5df..b76c4245df4 100644 --- a/src/Client/TestHint.h +++ b/src/Client/TestHint.h @@ -112,7 +112,7 @@ struct fmt::formatter } template - auto format(const DB::TestHint::ErrorVector & ErrorVector, FormatContext & ctx) + auto format(const DB::TestHint::ErrorVector & ErrorVector, FormatContext & ctx) const { if (ErrorVector.empty()) return fmt::format_to(ctx.out(), "{}", 0); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 5578a8dde60..510a4cacf1e 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -51,7 +51,7 @@ public: std::string getName() const override { return "Nullable(" + nested_column->getName() + ")"; } TypeIndex getDataType() const override { return TypeIndex::Nullable; } MutableColumnPtr cloneResized(size_t size) const override; - size_t size() const override { return nested_column->size(); } + size_t size() const override { return assert_cast(*null_map).size(); } bool isNullAt(size_t n) const override { return assert_cast(*null_map).getData()[n] != 0;} Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; diff --git a/src/Columns/FilterDescription.h b/src/Columns/FilterDescription.h index 63457b8b544..b4335a49787 100644 --- a/src/Columns/FilterDescription.h +++ b/src/Columns/FilterDescription.h @@ -23,15 +23,10 @@ struct ConstantFilterDescription struct IFilterDescription { - /// has_one can be pre-compute during creating the filter description in some cases - Int64 has_one = -1; virtual ColumnPtr filter(const IColumn & column, ssize_t result_size_hint) const = 0; virtual size_t countBytesInFilter() const = 0; virtual ~IFilterDescription() = default; - bool hasOne() { return has_one >= 0 ? has_one : hasOneImpl();} protected: - /// Calculate if filter has a non-zero from the filter values, may update has_one - virtual bool hasOneImpl() = 0; }; /// Obtain a filter from non constant Column, that may have type: UInt8, Nullable(UInt8). @@ -45,7 +40,6 @@ struct FilterDescription final : public IFilterDescription ColumnPtr filter(const IColumn & column, ssize_t result_size_hint) const override { return column.filter(*data, result_size_hint); } size_t countBytesInFilter() const override { return DB::countBytesInFilter(*data); } protected: - bool hasOneImpl() override { return data ? (has_one = !memoryIsZero(data->data(), 0, data->size())) : false; } }; struct SparseFilterDescription final : public IFilterDescription @@ -56,7 +50,6 @@ struct SparseFilterDescription final : public IFilterDescription ColumnPtr filter(const IColumn & column, ssize_t) const override { return column.index(*filter_indices, 0); } size_t countBytesInFilter() const override { return filter_indices->size(); } protected: - bool hasOneImpl() override { return filter_indices && !filter_indices->empty(); } }; struct ColumnWithTypeAndName; diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 1f5f94beee9..873a4060872 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -289,10 +289,14 @@ void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty) if (!column_function) return; + size_t original_size = column.column->size(); + if (!empty) column = column_function->reduce(); else - column.column = column_function->getResultType()->createColumn(); + column.column = column_function->getResultType()->createColumnConstWithDefaultValue(original_size)->convertToFullColumnIfConst(); + + chassert(column.column->size() == original_size); } int checkShortCircuitArguments(const ColumnsWithTypeAndName & arguments) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index e80c125c2a0..bfc85559fe8 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -1,8 +1,9 @@ #include -#include -#include -#include #include +#include +#include +#include +#include #include #include @@ -10,6 +11,12 @@ #include #include /// MADV_POPULATE_WRITE +namespace ProfileEvents +{ + extern const Event GWPAsanAllocateSuccess; + extern const Event GWPAsanAllocateFailed; + extern const Event GWPAsanFree; +} namespace DB { @@ -60,6 +67,27 @@ template void * allocNoTrack(size_t size, size_t alignment) { void * buf; +#if USE_GWP_ASAN + if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) + { + if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignment)) + { + if constexpr (clear_memory) + memset(ptr, 0, size); + + if constexpr (populate) + prefaultPages(ptr, size); + + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess); + + return ptr; + } + else + { + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed); + } + } +#endif if (alignment <= MALLOC_MIN_ALIGNMENT) { if constexpr (clear_memory) @@ -91,6 +119,15 @@ void * allocNoTrack(size_t size, size_t alignment) void freeNoTrack(void * buf) { +#if USE_GWP_ASAN + if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(buf))) + { + ProfileEvents::increment(ProfileEvents::GWPAsanFree); + GWPAsan::GuardedAlloc.deallocate(buf); + return; + } +#endif + ::free(buf); } @@ -144,8 +181,54 @@ void * Allocator::realloc(void * buf, size_t old_size, { /// nothing to do. /// BTW, it's not possible to change alignment while doing realloc. + return buf; } - else if (alignment <= MALLOC_MIN_ALIGNMENT) + +#if USE_GWP_ASAN + if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) + { + if (void * ptr = GWPAsan::GuardedAlloc.allocate(new_size, alignment)) + { + auto trace_free = CurrentMemoryTracker::free(old_size); + auto trace_alloc = CurrentMemoryTracker::alloc(new_size); + trace_free.onFree(buf, old_size); + + memcpy(ptr, buf, std::min(old_size, new_size)); + free(buf, old_size); + trace_alloc.onAlloc(buf, new_size); + + if constexpr (clear_memory) + if (new_size > old_size) + memset(reinterpret_cast(ptr) + old_size, 0, new_size - old_size); + + if constexpr (populate) + prefaultPages(ptr, new_size); + + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess); + return ptr; + } + else + { + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed); + } + } + + if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(buf))) + { + /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. + void * new_buf = alloc(new_size, alignment); + memcpy(new_buf, buf, std::min(old_size, new_size)); + free(buf, old_size); + buf = new_buf; + + if constexpr (populate) + prefaultPages(buf, new_size); + + return buf; + } +#endif + + if (alignment <= MALLOC_MIN_ALIGNMENT) { /// Resize malloc'd memory region with no special alignment requirement. auto trace_free = CurrentMemoryTracker::free(old_size); diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 4c71b9846c7..6309f6079f6 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -415,6 +415,15 @@ Value saveAllArenasMetric(AsynchronousMetricValues & values, fmt::format("jemalloc.arenas.all.{}", metric_name)); } +template +Value saveJemallocProf(AsynchronousMetricValues & values, + const std::string & metric_name) +{ + return saveJemallocMetricImpl(values, + fmt::format("prof.{}", metric_name), + fmt::format("jemalloc.prof.{}", metric_name)); +} + } #endif @@ -607,6 +616,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) saveJemallocMetric(new_values, "background_thread.num_threads"); saveJemallocMetric(new_values, "background_thread.num_runs"); saveJemallocMetric(new_values, "background_thread.run_intervals"); + saveJemallocProf(new_values, "active"); saveAllArenasMetric(new_values, "pactive"); [[maybe_unused]] size_t je_malloc_pdirty = saveAllArenasMetric(new_values, "pdirty"); [[maybe_unused]] size_t je_malloc_pmuzzy = saveAllArenasMetric(new_values, "pmuzzy"); diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h index b62529a08e7..10a972d2458 100644 --- a/src/Common/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -45,14 +45,17 @@ struct ProtocolServerMetrics }; /** Periodically (by default, each second) - * calculates and updates some metrics, - * that are not updated automatically (so, need to be asynchronously calculated). + * calculates and updates some metrics, + * that are not updated automatically (so, need to be asynchronously calculated). * - * This includes both ClickHouse-related metrics (like memory usage of ClickHouse process) - * and common OS-related metrics (like total memory usage on the server). + * This includes both general process metrics (like memory usage) + * and common OS-related metrics (like total memory usage on the server). * * All the values are either gauge type (like the total number of tables, the current memory usage). * Or delta-counters representing some accumulation during the interval of time. + * + * Server and Keeper specific metrics are contained inside + * ServerAsynchronousMetrics and KeeperAsynchronousMetrics respectively. */ class AsynchronousMetrics { diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index 922607da813..16b9488c98d 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include #include #include @@ -200,22 +198,18 @@ public: */ bool finish() { - bool was_finished_before = false; - { std::lock_guard lock(queue_mutex); if (is_finished) return true; - was_finished_before = is_finished; is_finished = true; } pop_condition.notify_all(); push_condition.notify_all(); - - return was_finished_before; + return false; } /// Returns if queue is finished diff --git a/src/Common/Config/ConfigReloader.cpp b/src/Common/Config/ConfigReloader.cpp index b2c07dacf07..769a63c036b 100644 --- a/src/Common/Config/ConfigReloader.cpp +++ b/src/Common/Config/ConfigReloader.cpp @@ -19,8 +19,7 @@ ConfigReloader::ConfigReloader( const std::string & preprocessed_dir_, zkutil::ZooKeeperNodeCache && zk_node_cache_, const zkutil::EventPtr & zk_changed_event_, - Updater && updater_, - bool already_loaded) + Updater && updater_) : config_path(config_path_) , extra_paths(extra_paths_) , preprocessed_dir(preprocessed_dir_) @@ -28,10 +27,15 @@ ConfigReloader::ConfigReloader( , zk_changed_event(zk_changed_event_) , updater(std::move(updater_)) { - if (!already_loaded) - reloadIfNewer(/* force = */ true, /* throw_on_error = */ true, /* fallback_to_preprocessed = */ true, /* initial_loading = */ true); -} + auto config = reloadIfNewer(/* force = */ true, /* throw_on_error = */ true, /* fallback_to_preprocessed = */ true, /* initial_loading = */ true); + if (config.has_value()) + reload_interval = std::chrono::milliseconds(config->configuration->getInt64("config_reload_interval_ms", DEFAULT_RELOAD_INTERVAL.count())); + else + reload_interval = DEFAULT_RELOAD_INTERVAL; + + LOG_TRACE(log, "Config reload interval set to {}ms", reload_interval.count()); +} void ConfigReloader::start() { @@ -82,7 +86,17 @@ void ConfigReloader::run() if (quit) return; - reloadIfNewer(zk_changed, /* throw_on_error = */ false, /* fallback_to_preprocessed = */ false, /* initial_loading = */ false); + auto config = reloadIfNewer(zk_changed, /* throw_on_error = */ false, /* fallback_to_preprocessed = */ false, /* initial_loading = */ false); + if (config.has_value()) + { + auto new_reload_interval = std::chrono::milliseconds(config->configuration->getInt64("config_reload_interval_ms", DEFAULT_RELOAD_INTERVAL.count())); + if (new_reload_interval != reload_interval) + { + reload_interval = new_reload_interval; + LOG_TRACE(log, "Config reload interval changed to {}ms", reload_interval.count()); + } + } + } catch (...) { @@ -92,7 +106,7 @@ void ConfigReloader::run() } } -void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading) +std::optional ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading) { std::lock_guard lock(reload_mutex); @@ -120,7 +134,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac throw; tryLogCurrentException(log, "ZooKeeper error when loading config from '" + config_path + "'"); - return; + return std::nullopt; } catch (...) { @@ -128,7 +142,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac throw; tryLogCurrentException(log, "Error loading config from '" + config_path + "'"); - return; + return std::nullopt; } config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir); @@ -154,11 +168,13 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac if (throw_on_error) throw; tryLogCurrentException(log, "Error updating configuration from '" + config_path + "' config."); - return; + return std::nullopt; } LOG_DEBUG(log, "Loaded config '{}', performed update on configuration", config_path); + return loaded_config; } + return std::nullopt; } struct ConfigReloader::FileWithTimestamp diff --git a/src/Common/Config/ConfigReloader.h b/src/Common/Config/ConfigReloader.h index 13a797bad08..89ef0fd8a0b 100644 --- a/src/Common/Config/ConfigReloader.h +++ b/src/Common/Config/ConfigReloader.h @@ -17,8 +17,6 @@ namespace Poco { class Logger; } namespace DB { -class Context; - /** Every two seconds checks configuration files for update. * If configuration is changed, then config will be reloaded by ConfigProcessor * and the reloaded config will be applied via Updater functor. @@ -27,6 +25,8 @@ class Context; class ConfigReloader { public: + static constexpr auto DEFAULT_RELOAD_INTERVAL = std::chrono::milliseconds(2000); + using Updater = std::function; ConfigReloader( @@ -35,8 +35,7 @@ public: const std::string & preprocessed_dir, zkutil::ZooKeeperNodeCache && zk_node_cache, const zkutil::EventPtr & zk_changed_event, - Updater && updater, - bool already_loaded); + Updater && updater); ~ConfigReloader(); @@ -53,7 +52,7 @@ public: private: void run(); - void reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading); + std::optional reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading); struct FileWithTimestamp; @@ -67,8 +66,6 @@ private: FilesChangesTracker getNewFileList() const; - static constexpr auto reload_interval = std::chrono::seconds(2); - LoggerPtr log = getLogger("ConfigReloader"); std::string config_path; @@ -85,6 +82,8 @@ private: std::atomic quit{false}; ThreadFromGlobalPool thread; + std::chrono::milliseconds reload_interval = DEFAULT_RELOAD_INTERVAL; + /// Locked inside reloadIfNewer. std::mutex reload_mutex; }; diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 731c72d65f2..8516a88c7af 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -1,6 +1,7 @@ #include +// clang-format off /// Available metrics. Add something here as you wish. /// If the metric is generic (i.e. not server specific) /// it should be also added to src/Coordination/KeeperConstant.cpp diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp index 99da3b75429..8439c01b22c 100644 --- a/src/Common/Dwarf.cpp +++ b/src/Common/Dwarf.cpp @@ -202,7 +202,10 @@ uint64_t readU64(std::string_view & sp) { SAFE_CHECK(sp.size() >= N, "underflow"); uint64_t x = 0; - memcpy(&x, sp.data(), N); + if constexpr (std::endian::native == std::endian::little) + memcpy(&x, sp.data(), N); + else + memcpy(reinterpret_cast(&x) + sizeof(uint64_t) - N, sp.data(), N); sp.remove_prefix(N); return x; } diff --git a/src/Common/EnvironmentProxyConfigurationResolver.cpp b/src/Common/EnvironmentProxyConfigurationResolver.cpp index f2c60afa1a8..b7b1f1ecfde 100644 --- a/src/Common/EnvironmentProxyConfigurationResolver.cpp +++ b/src/Common/EnvironmentProxyConfigurationResolver.cpp @@ -1,6 +1,7 @@ #include "EnvironmentProxyConfigurationResolver.h" #include +#include #include namespace DB @@ -12,6 +13,7 @@ namespace DB * */ static constexpr auto PROXY_HTTP_ENVIRONMENT_VARIABLE = "http_proxy"; static constexpr auto PROXY_HTTPS_ENVIRONMENT_VARIABLE = "https_proxy"; +static constexpr auto NO_PROXY_ENVIRONMENT_VARIABLE = "no_proxy"; EnvironmentProxyConfigurationResolver::EnvironmentProxyConfigurationResolver( Protocol request_protocol_, bool disable_tunneling_for_https_requests_over_http_proxy_) @@ -34,31 +36,60 @@ namespace return std::getenv(PROXY_HTTPS_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe) } } + + const char * getNoProxyHosts() + { + return std::getenv(NO_PROXY_ENVIRONMENT_VARIABLE); // NOLINT(concurrency-mt-unsafe) + } + + ProxyConfiguration buildProxyConfiguration( + ProxyConfiguration::Protocol request_protocol, + const Poco::URI & uri, + const std::string & no_proxy_hosts_string, + bool disable_tunneling_for_https_requests_over_http_proxy) + { + if (uri.empty()) + { + return {}; + } + + const auto & host = uri.getHost(); + const auto & scheme = uri.getScheme(); + const auto port = uri.getPort(); + + const bool use_tunneling_for_https_requests_over_http_proxy = ProxyConfiguration::useTunneling( + request_protocol, + ProxyConfiguration::protocolFromString(scheme), + disable_tunneling_for_https_requests_over_http_proxy); + + LOG_TRACE(getLogger("EnvironmentProxyConfigurationResolver"), "Use proxy from environment: {}://{}:{}", scheme, host, port); + + return ProxyConfiguration { + host, + ProxyConfiguration::protocolFromString(scheme), + port, + use_tunneling_for_https_requests_over_http_proxy, + request_protocol, + no_proxy_hosts_string + }; + } } ProxyConfiguration EnvironmentProxyConfigurationResolver::resolve() { - const auto * proxy_host = getProxyHost(request_protocol); + static const auto * http_proxy_host = getProxyHost(Protocol::HTTP); + static const auto * https_proxy_host = getProxyHost(Protocol::HTTPS); + static const auto * no_proxy = getNoProxyHosts(); + static const auto poco_no_proxy_hosts = no_proxy ? buildPocoNonProxyHosts(no_proxy) : ""; - if (!proxy_host) - { - return {}; - } + static const Poco::URI http_proxy_uri(http_proxy_host ? http_proxy_host : ""); + static const Poco::URI https_proxy_uri(https_proxy_host ? https_proxy_host : ""); - auto uri = Poco::URI(proxy_host); - auto host = uri.getHost(); - auto scheme = uri.getScheme(); - auto port = uri.getPort(); - - LOG_TRACE(getLogger("EnvironmentProxyConfigurationResolver"), "Use proxy from environment: {}://{}:{}", scheme, host, port); - - return ProxyConfiguration { - host, - ProxyConfiguration::protocolFromString(scheme), - port, - useTunneling(request_protocol, ProxyConfiguration::protocolFromString(scheme), disable_tunneling_for_https_requests_over_http_proxy), - request_protocol - }; + return buildProxyConfiguration( + request_protocol, + request_protocol == Protocol::HTTP ? http_proxy_uri : https_proxy_uri, + poco_no_proxy_hosts, + disable_tunneling_for_https_requests_over_http_proxy); } } diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index ea6f9510927..b1b8e2367a4 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -586,7 +586,7 @@ M(705, TABLE_NOT_EMPTY) \ M(706, LIBSSH_ERROR) \ M(707, GCP_ERROR) \ - M(708, ILLEGAL_STATISTIC) \ + M(708, ILLEGAL_STATISTICS) \ M(709, CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT) \ M(710, FAULT_INJECTED) \ M(711, FILECACHE_ACCESS_DENIED) \ @@ -602,6 +602,8 @@ M(721, DEPRECATED_FUNCTION) \ M(722, ASYNC_LOAD_WAIT_FAILED) \ M(723, PARQUET_EXCEPTION) \ + M(724, TOO_MANY_TABLES) \ + M(725, TOO_MANY_DATABASES) \ \ M(900, DISTRIBUTED_CACHE_ERROR) \ M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ diff --git a/src/Common/ErrorCodes.h b/src/Common/ErrorCodes.h index 8879779a5e2..11a163becbe 100644 --- a/src/Common/ErrorCodes.h +++ b/src/Common/ErrorCodes.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include #include #include @@ -35,7 +33,7 @@ namespace ErrorCodes struct Error { - /// Number of times Exception with this ErrorCode had been throw. + /// Number of times Exception with this ErrorCode has been thrown. Value count = 0; /// Time of the last error. UInt64 error_time_ms = 0; diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp new file mode 100644 index 00000000000..488f8e2c5dc --- /dev/null +++ b/src/Common/GWPAsan.cpp @@ -0,0 +1,226 @@ +#include + +#if USE_GWP_ASAN +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include +# include + +namespace GWPAsan +{ + +namespace +{ +size_t getBackTrace(uintptr_t * trace_buffer, size_t buffer_size) +{ + StackTrace stacktrace; + auto trace_size = std::min(buffer_size, stacktrace.getSize()); + const auto & frame_pointers = stacktrace.getFramePointers(); + memcpy(trace_buffer, frame_pointers.data(), trace_size * sizeof(uintptr_t)); + return trace_size; +} + +__attribute__((__format__ (__printf__, 1, 0))) +void printString(const char * format, ...) // NOLINT(cert-dcl50-cpp) +{ + std::array formatted; + va_list args; + va_start(args, format); + + if (vsnprintf(formatted.data(), formatted.size(), format, args) > 0) + std::cerr << formatted.data() << std::endl; + + va_end(args); +} + +} + +gwp_asan::GuardedPoolAllocator GuardedAlloc; + +static bool guarded_alloc_initialized = [] +{ + const char * env_options_raw = std::getenv("GWP_ASAN_OPTIONS"); // NOLINT(concurrency-mt-unsafe) + if (env_options_raw) + gwp_asan::options::initOptions(env_options_raw, printString); + + auto & opts = gwp_asan::options::getOptions(); + if (!env_options_raw || !std::string_view{env_options_raw}.contains("MaxSimultaneousAllocations")) + opts.MaxSimultaneousAllocations = 1024; + + if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) + opts.SampleRate = 50000; + + opts.Backtrace = getBackTrace; + GuardedAlloc.init(opts); + + return true; +}(); + +bool isGWPAsanError(uintptr_t fault_address) +{ + const auto * state = GuardedAlloc.getAllocatorState(); + if (state->FailureType != gwp_asan::Error::UNKNOWN && state->FailureAddress != 0) + return true; + + return fault_address < state->GuardedPagePoolEnd && state->GuardedPagePool <= fault_address; +} + +namespace +{ + +struct ScopedEndOfReportDecorator +{ + explicit ScopedEndOfReportDecorator(Poco::LoggerPtr log_) : log(std::move(log_)) { } + ~ScopedEndOfReportDecorator() { LOG_FATAL(log, "*** End GWP-ASan report ***"); } + Poco::LoggerPtr log; +}; + +// Prints the provided error and metadata information. +void printHeader(gwp_asan::Error error, uintptr_t fault_address, const gwp_asan::AllocationMetadata * allocation_meta, Poco::LoggerPtr log) +{ + bool access_was_in_bounds = false; + std::string description; + if (error != gwp_asan::Error::UNKNOWN && allocation_meta != nullptr) + { + uintptr_t address = __gwp_asan_get_allocation_address(allocation_meta); + size_t size = __gwp_asan_get_allocation_size(allocation_meta); + if (fault_address < address) + { + description = fmt::format( + "({} byte{} to the left of a {}-byte allocation at 0x{}) ", + address - fault_address, + (address - fault_address == 1) ? "" : "s", + size, + address); + } + else if (fault_address > address) + { + description = fmt::format( + "({} byte{} to the right of a {}-byte allocation at 0x{}) ", + fault_address - address, + (fault_address - address == 1) ? "" : "s", + size, + address); + } + else if (error == gwp_asan::Error::DOUBLE_FREE) + { + description = fmt::format("(a {}-byte allocation) ", size); + } + else + { + access_was_in_bounds = true; + description = fmt::format( + "({} byte{} into a {}-byte allocation at 0x{}) ", + fault_address - address, + (fault_address - address == 1) ? "" : "s", + size, + address); + } + } + + uint64_t thread_id = gwp_asan::getThreadID(); + std::string thread_id_string = thread_id == gwp_asan::kInvalidThreadID ? " 512B in length.\n"; + + if (allocation_meta == nullptr) + { + LOG_FATAL(logger, "*** GWP-ASan detected a memory error ***"); + ScopedEndOfReportDecorator decorator(logger); + LOG_FATAL(logger, fmt::runtime(unknown_crash_text)); + return; + } + + LOG_FATAL(logger, "*** GWP-ASan detected a memory error ***"); + ScopedEndOfReportDecorator decorator(logger); + + gwp_asan::Error error = __gwp_asan_diagnose_error(state, allocation_meta, fault_address); + if (error == gwp_asan::Error::UNKNOWN) + { + LOG_FATAL(logger, fmt::runtime(unknown_crash_text)); + return; + } + + // Print the error header. + printHeader(error, fault_address, allocation_meta, logger); + + static constexpr size_t maximum_stack_frames = 512; + std::array trace; + + // Maybe print the deallocation trace. + if (__gwp_asan_is_deallocated(allocation_meta)) + { + uint64_t thread_id = __gwp_asan_get_deallocation_thread_id(allocation_meta); + if (thread_id == gwp_asan::kInvalidThreadID) + LOG_FATAL(logger, "0x{} was deallocated by thread here:", fault_address); + else + LOG_FATAL(logger, "0x{} was deallocated by thread {} here:", fault_address, thread_id); + const auto trace_length = __gwp_asan_get_deallocation_trace(allocation_meta, trace.data(), maximum_stack_frames); + StackTrace::toStringEveryLine( + reinterpret_cast(trace.data()), 0, trace_length, [&](const auto line) { LOG_FATAL(logger, fmt::runtime(line)); }); + } + + // Print the allocation trace. + uint64_t thread_id = __gwp_asan_get_allocation_thread_id(allocation_meta); + if (thread_id == gwp_asan::kInvalidThreadID) + LOG_FATAL(logger, "0x{} was allocated by thread here:", fault_address); + else + LOG_FATAL(logger, "0x{} was allocated by thread {} here:", fault_address, thread_id); + const auto trace_length = __gwp_asan_get_allocation_trace(allocation_meta, trace.data(), maximum_stack_frames); + StackTrace::toStringEveryLine( + reinterpret_cast(trace.data()), 0, trace_length, [&](const auto line) { LOG_FATAL(logger, fmt::runtime(line)); }); +} + +std::atomic force_sample_probability = 0.0; + +void setForceSampleProbability(double value) +{ + force_sample_probability.store(value, std::memory_order_relaxed); +} + +} + +#endif diff --git a/src/Common/GWPAsan.h b/src/Common/GWPAsan.h new file mode 100644 index 00000000000..b3215c6157e --- /dev/null +++ b/src/Common/GWPAsan.h @@ -0,0 +1,34 @@ +#pragma once + +#include "config.h" + +#if USE_GWP_ASAN + +#include +#include + +#include +#include + +namespace GWPAsan +{ + +extern gwp_asan::GuardedPoolAllocator GuardedAlloc; + +bool isGWPAsanError(uintptr_t fault_address); + +void printReport(uintptr_t fault_address); + +extern std::atomic force_sample_probability; + +void setForceSampleProbability(double value); + +inline bool shouldForceSample() +{ + std::bernoulli_distribution dist(force_sample_probability.load(std::memory_order_relaxed)); + return dist(thread_local_rng); +} + +} + +#endif diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp index d4c6f89ff92..dc5704ef6b5 100644 --- a/src/Common/GetPriorityForLoadBalancing.cpp +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -60,4 +60,26 @@ GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t return get_priority; } +/// Some load balancing strategies (such as "nearest hostname") have preferred nodes to connect to. +/// Usually it's a node in the same data center/availability zone. +/// For other strategies there's no difference between nodes. +bool GetPriorityForLoadBalancing::hasOptimalNode() const +{ + switch (load_balancing) + { + case LoadBalancing::NEAREST_HOSTNAME: + return true; + case LoadBalancing::HOSTNAME_LEVENSHTEIN_DISTANCE: + return true; + case LoadBalancing::IN_ORDER: + return false; + case LoadBalancing::RANDOM: + return false; + case LoadBalancing::FIRST_OR_RANDOM: + return true; + case LoadBalancing::ROUND_ROBIN: + return false; + } +} + } diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index 0de99730977..01dae9a1289 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -30,6 +30,8 @@ public: Func getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; + bool hasOptimalNode() const; + std::vector hostname_prefix_distance; /// Prefix distances from name of this host to the names of hosts of pools. std::vector hostname_levenshtein_distance; /// Levenshtein Distances from name of this host to the names of hosts of pools. diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index 167aeee68f3..f3ff09bc90a 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -70,20 +71,6 @@ namespace CurrentMetrics namespace { - Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration) - { - Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config; - - poco_proxy_config.host = proxy_configuration.host; - poco_proxy_config.port = proxy_configuration.port; - poco_proxy_config.protocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.protocol); - poco_proxy_config.tunnel = proxy_configuration.tunneling; - poco_proxy_config.originalRequestProtocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol); - - return poco_proxy_config; - } - - constexpr size_t roundUp(size_t x, size_t rounding) { chassert(rounding > 0); @@ -696,7 +683,8 @@ struct EndpointPoolKey proxy_config.port, proxy_config.protocol, proxy_config.tunneling, - proxy_config.original_request_protocol) + proxy_config.original_request_protocol, + proxy_config.no_proxy_hosts) == std::tie( rhs.connection_group, rhs.target_host, @@ -706,7 +694,8 @@ struct EndpointPoolKey rhs.proxy_config.port, rhs.proxy_config.protocol, rhs.proxy_config.tunneling, - rhs.proxy_config.original_request_protocol); + rhs.proxy_config.original_request_protocol, + rhs.proxy_config.no_proxy_hosts); } }; diff --git a/src/Common/HilbertUtils.h b/src/Common/HilbertUtils.h new file mode 100644 index 00000000000..f0f8360de90 --- /dev/null +++ b/src/Common/HilbertUtils.h @@ -0,0 +1,161 @@ +#pragma once + +#include +#include +#include "base/types.h" +#include +#include +#include +#include + + +namespace HilbertDetails +{ + + struct Segment // represents [begin; end], all bounds are included + { + UInt64 begin; + UInt64 end; + }; + +} + +/* + Given the range of values of hilbert code - and this function will return segments of the Hilbert curve + such that each of them lies in a whole domain (aka square) + 0 1 + ┌────────────────────────────────┐ + │ │ │ + │ │ │ + 0 │ 00xxx │ 11xxx │ + │ | │ | │ + │ | │ | │ + │_______________│________________│ + │ | │ | │ + │ | │ | │ + │ | │ | │ + 1 │ 01xxx______│_____10xxx │ + │ │ │ + │ │ │ + └────────────────────────────────┘ + Imagine a square, one side of which is a x-axis, other is a y-axis. + First approximation of the Hilbert curve is on the picture - U curve. + So we divide Hilbert Code Interval on 4 parts each of which is represented by a square + and look where the given interval [start, finish] is located: + [00xxxxxx | 01xxxxxx | 10xxxxxx | 11xxxxxx ] + 1: [ ] + start = 0010111 end = 10111110 + 2: [ ] [ ] + If it contains a whole sector (that represents a domain=square), + then we take this range. In the example above - it is a sector [01000000, 01111111] + Then we dig into the recursion and check the remaining ranges. + Note that after the first call all other ranges in the recursion will have either start or finish on the end of a range, + so the complexity of the algorithm will be O(logN), where N is the maximum of hilbert code. +*/ +template +void segmentBinaryPartition(UInt64 start, UInt64 finish, UInt8 current_bits, F && callback) +{ + if (current_bits == 0) + return; + + const auto next_bits = current_bits - 2; + const auto history = current_bits == 64 ? 0 : (start >> current_bits) << current_bits; + + const auto chunk_mask = 0b11; + const auto start_chunk = (start >> next_bits) & chunk_mask; + const auto finish_chunk = (finish >> next_bits) & chunk_mask; + + auto construct_range = [next_bits, history](UInt64 chunk) + { + return HilbertDetails::Segment{ + .begin = history + (chunk << next_bits), + .end = history + ((chunk + 1) << next_bits) - 1 + }; + }; + + if (start_chunk == finish_chunk) + { + if ((finish - start + 1) == (1 << next_bits)) // it means that [begin, end] is a range + { + callback(HilbertDetails::Segment{.begin = start, .end = finish}); + return; + } + segmentBinaryPartition(start, finish, next_bits, callback); + return; + } + + for (auto range_chunk = start_chunk + 1; range_chunk < finish_chunk; ++range_chunk) + { + callback(construct_range(range_chunk)); + } + + const auto start_range = construct_range(start_chunk); + if (start == start_range.begin) + { + callback(start_range); + } + else + { + segmentBinaryPartition(start, start_range.end, next_bits, callback); + } + + const auto finish_range = construct_range(finish_chunk); + if (finish == finish_range.end) + { + callback(finish_range); + } + else + { + segmentBinaryPartition(finish_range.begin, finish, next_bits, callback); + } +} + +// Given 2 points representing ends of the range of Hilbert Curve that lies in a whole domain. +// The are neighbour corners of some square - and the function returns ranges of both sides of this square +inline std::array, 2> createRangeFromCorners(UInt64 x1, UInt64 y1, UInt64 x2, UInt64 y2) +{ + UInt64 dist_x = x1 > x2 ? x1 - x2 : x2 - x1; + UInt64 dist_y = y1 > y2 ? y1 - y2 : y2 - y1; + UInt64 range_size = std::max(dist_x, dist_y); + bool contains_minimum_vertice = x1 % (range_size + 1) == 0; + if (contains_minimum_vertice) + { + UInt64 x_min = std::min(x1, x2); + UInt64 y_min = std::min(y1, y2); + return { + std::pair{x_min, x_min + range_size}, + std::pair{y_min, y_min + range_size} + }; + } + else + { + UInt64 x_max = std::max(x1, x2); + UInt64 y_max = std::max(y1, y2); + chassert(x_max >= range_size); + chassert(y_max >= range_size); + return { + std::pair{x_max - range_size, x_max}, + std::pair{y_max - range_size, y_max} + }; + } +} + +/** Unpack an interval of Hilbert curve to hyperrectangles covered by it across N dimensions. + */ +template +void hilbertIntervalToHyperrectangles2D(UInt64 first, UInt64 last, F && callback) +{ + const auto equal_bits_count = getLeadingZeroBits(last | first); + const auto even_equal_bits_count = equal_bits_count - equal_bits_count % 2; + segmentBinaryPartition(first, last, 64 - even_equal_bits_count, [&](HilbertDetails::Segment range) + { + auto interval1 = DB::FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(range.begin); + auto interval2 = DB::FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(range.end); + + std::array, 2> unpacked_range = createRangeFromCorners( + std::get<0>(interval1), std::get<1>(interval1), + std::get<0>(interval2), std::get<1>(interval2)); + + callback(unpacked_range); + }); +} diff --git a/src/Common/ICachePolicy.h b/src/Common/ICachePolicy.h index 8aa75d1d81f..301a5c6cbbd 100644 --- a/src/Common/ICachePolicy.h +++ b/src/Common/ICachePolicy.h @@ -48,7 +48,7 @@ public: /// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also /// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key. - /// Then use getWithKey() to also return the found key including it's non-hashed data. + /// Then use getWithKey() to also return the found key including its non-hashed data. virtual MappedPtr get(const Key & key) = 0; virtual std::optional getWithKey(const Key &) = 0; diff --git a/src/Common/MemoryTrackerSwitcher.h b/src/Common/MemoryTrackerSwitcher.h index 3c99fd12353..796b5295a83 100644 --- a/src/Common/MemoryTrackerSwitcher.h +++ b/src/Common/MemoryTrackerSwitcher.h @@ -15,6 +15,7 @@ struct MemoryTrackerSwitcher return; auto * thread_tracker = CurrentThread::getMemoryTracker(); + prev_untracked_memory = current_thread->untracked_memory; prev_memory_tracker_parent = thread_tracker->getParent(); @@ -31,8 +32,10 @@ struct MemoryTrackerSwitcher CurrentThread::flushUntrackedMemory(); auto * thread_tracker = CurrentThread::getMemoryTracker(); - current_thread->untracked_memory = prev_untracked_memory; + /// It is important to set untracked memory after the call of + /// 'setParent' because it may flush untracked memory to the wrong parent. thread_tracker->setParent(prev_memory_tracker_parent); + current_thread->untracked_memory = prev_untracked_memory; } private: diff --git a/src/Common/NamedCollections/NamedCollectionUtils.cpp b/src/Common/NamedCollections/NamedCollectionUtils.cpp deleted file mode 100644 index 5dbdeb10795..00000000000 --- a/src/Common/NamedCollections/NamedCollectionUtils.cpp +++ /dev/null @@ -1,484 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace fs = std::filesystem; - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NAMED_COLLECTION_ALREADY_EXISTS; - extern const int NAMED_COLLECTION_DOESNT_EXIST; - extern const int BAD_ARGUMENTS; -} - -namespace NamedCollectionUtils -{ - -static std::atomic is_loaded_from_config = false; -static std::atomic is_loaded_from_sql = false; - -class LoadFromConfig -{ -private: - const Poco::Util::AbstractConfiguration & config; - -public: - explicit LoadFromConfig(const Poco::Util::AbstractConfiguration & config_) - : config(config_) {} - - std::vector listCollections() const - { - Poco::Util::AbstractConfiguration::Keys collections_names; - config.keys(NAMED_COLLECTIONS_CONFIG_PREFIX, collections_names); - return collections_names; - } - - NamedCollectionsMap getAll() const - { - NamedCollectionsMap result; - for (const auto & collection_name : listCollections()) - { - if (result.contains(collection_name)) - { - throw Exception( - ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, - "Found duplicate named collection `{}`", - collection_name); - } - result.emplace(collection_name, get(collection_name)); - } - return result; - } - - MutableNamedCollectionPtr get(const std::string & collection_name) const - { - const auto collection_prefix = getCollectionPrefix(collection_name); - std::queue enumerate_input; - std::set> enumerate_result; - - enumerate_input.push(collection_prefix); - NamedCollectionConfiguration::listKeys(config, std::move(enumerate_input), enumerate_result, -1); - - /// Collection does not have any keys. - /// (`enumerate_result` == ). - const bool collection_is_empty = enumerate_result.size() == 1 - && *enumerate_result.begin() == collection_prefix; - std::set> keys; - if (!collection_is_empty) - { - /// Skip collection prefix and add +1 to avoid '.' in the beginning. - for (const auto & path : enumerate_result) - keys.emplace(path.substr(collection_prefix.size() + 1)); - } - - return NamedCollection::create( - config, collection_name, collection_prefix, keys, SourceId::CONFIG, /* is_mutable */false); - } - -private: - static constexpr auto NAMED_COLLECTIONS_CONFIG_PREFIX = "named_collections"; - - static std::string getCollectionPrefix(const std::string & collection_name) - { - return fmt::format("{}.{}", NAMED_COLLECTIONS_CONFIG_PREFIX, collection_name); - } -}; - - -class LoadFromSQL : private WithContext -{ -private: - const std::string metadata_path; - -public: - explicit LoadFromSQL(ContextPtr context_) - : WithContext(context_) - , metadata_path(fs::weakly_canonical(context_->getPath()) / NAMED_COLLECTIONS_METADATA_DIRECTORY) - { - if (fs::exists(metadata_path)) - cleanup(); - } - - std::vector listCollections() const - { - if (!fs::exists(metadata_path)) - return {}; - - std::vector collection_names; - fs::directory_iterator it{metadata_path}; - for (; it != fs::directory_iterator{}; ++it) - { - const auto & current_path = it->path(); - if (current_path.extension() == ".sql") - { - collection_names.push_back(it->path().stem()); - } - else - { - LOG_WARNING( - getLogger("NamedCollectionsLoadFromSQL"), - "Unexpected file {} in named collections directory", - current_path.filename().string()); - } - } - return collection_names; - } - - NamedCollectionsMap getAll() const - { - NamedCollectionsMap result; - for (const auto & collection_name : listCollections()) - { - if (result.contains(collection_name)) - { - throw Exception( - ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, - "Found duplicate named collection `{}`", - collection_name); - } - result.emplace(collection_name, get(collection_name)); - } - return result; - } - - MutableNamedCollectionPtr get(const std::string & collection_name) const - { - const auto query = readCreateQueryFromMetadata( - getMetadataPath(collection_name), - getContext()->getSettingsRef()); - return createNamedCollectionFromAST(query); - } - - MutableNamedCollectionPtr create(const ASTCreateNamedCollectionQuery & query) - { - writeCreateQueryToMetadata( - query, - getMetadataPath(query.collection_name), - getContext()->getSettingsRef()); - - return createNamedCollectionFromAST(query); - } - - void update(const ASTAlterNamedCollectionQuery & query) - { - const auto path = getMetadataPath(query.collection_name); - auto create_query = readCreateQueryFromMetadata(path, getContext()->getSettings()); - - std::unordered_map result_changes_map; - for (const auto & [name, value] : query.changes) - { - auto [it, inserted] = result_changes_map.emplace(name, value); - if (!inserted) - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Value with key `{}` is used twice in the SET query (collection name: {})", - name, query.collection_name); - } - } - - for (const auto & [name, value] : create_query.changes) - result_changes_map.emplace(name, value); - - std::unordered_map result_overridability_map; - for (const auto & [name, value] : query.overridability) - result_overridability_map.emplace(name, value); - for (const auto & [name, value] : create_query.overridability) - result_overridability_map.emplace(name, value); - - for (const auto & delete_key : query.delete_keys) - { - auto it = result_changes_map.find(delete_key); - if (it == result_changes_map.end()) - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Cannot delete key `{}` because it does not exist in collection", - delete_key); - } - else - { - result_changes_map.erase(it); - auto it_override = result_overridability_map.find(delete_key); - if (it_override != result_overridability_map.end()) - result_overridability_map.erase(it_override); - } - } - - create_query.changes.clear(); - for (const auto & [name, value] : result_changes_map) - create_query.changes.emplace_back(name, value); - create_query.overridability = std::move(result_overridability_map); - - if (create_query.changes.empty()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Named collection cannot be empty (collection name: {})", - query.collection_name); - - writeCreateQueryToMetadata( - create_query, - getMetadataPath(query.collection_name), - getContext()->getSettingsRef(), - true); - } - - void remove(const std::string & collection_name) - { - auto collection_path = getMetadataPath(collection_name); - if (!fs::exists(collection_path)) - { - throw Exception( - ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, - "Cannot remove collection `{}`, because it doesn't exist", - collection_name); - } - (void)fs::remove(collection_path); - } - -private: - static constexpr auto NAMED_COLLECTIONS_METADATA_DIRECTORY = "named_collections"; - - static MutableNamedCollectionPtr createNamedCollectionFromAST( - const ASTCreateNamedCollectionQuery & query) - { - const auto & collection_name = query.collection_name; - const auto config = NamedCollectionConfiguration::createConfiguration(collection_name, query.changes, query.overridability); - - std::set> keys; - for (const auto & [name, _] : query.changes) - keys.insert(name); - - return NamedCollection::create( - *config, collection_name, "", keys, SourceId::SQL, /* is_mutable */true); - } - - std::string getMetadataPath(const std::string & collection_name) const - { - return fs::path(metadata_path) / (escapeForFileName(collection_name) + ".sql"); - } - - /// Delete .tmp files. They could be left undeleted in case of - /// some exception or abrupt server restart. - void cleanup() - { - fs::directory_iterator it{metadata_path}; - std::vector files_to_remove; - for (; it != fs::directory_iterator{}; ++it) - { - const auto & current_path = it->path(); - if (current_path.extension() == ".tmp") - files_to_remove.push_back(current_path); - } - for (const auto & file : files_to_remove) - (void)fs::remove(file); - } - - static ASTCreateNamedCollectionQuery readCreateQueryFromMetadata( - const std::string & path, - const Settings & settings) - { - ReadBufferFromFile in(path); - std::string query; - readStringUntilEOF(query, in); - - ParserCreateNamedCollectionQuery parser; - auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth, settings.max_parser_backtracks); - const auto & create_query = ast->as(); - return create_query; - } - - void writeCreateQueryToMetadata( - const ASTCreateNamedCollectionQuery & query, - const std::string & path, - const Settings & settings, - bool replace = false) const - { - if (!replace && fs::exists(path)) - { - throw Exception( - ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, - "Metadata file {} for named collection already exists", - path); - } - - fs::create_directories(metadata_path); - - auto tmp_path = path + ".tmp"; - String formatted_query = serializeAST(query); - WriteBufferFromFile out(tmp_path, formatted_query.size(), O_WRONLY | O_CREAT | O_EXCL); - writeString(formatted_query, out); - - out.next(); - if (settings.fsync_metadata) - out.sync(); - out.close(); - - fs::rename(tmp_path, path); - } -}; - -std::unique_lock lockNamedCollectionsTransaction() -{ - static std::mutex transaction_lock; - return std::unique_lock(transaction_lock); -} - -void loadFromConfigUnlocked(const Poco::Util::AbstractConfiguration & config, std::unique_lock &) -{ - auto named_collections = LoadFromConfig(config).getAll(); - LOG_TRACE( - getLogger("NamedCollectionsUtils"), - "Loaded {} collections from config", named_collections.size()); - - NamedCollectionFactory::instance().add(std::move(named_collections)); - is_loaded_from_config = true; -} - -void loadFromConfig(const Poco::Util::AbstractConfiguration & config) -{ - auto lock = lockNamedCollectionsTransaction(); - loadFromConfigUnlocked(config, lock); -} - -void reloadFromConfig(const Poco::Util::AbstractConfiguration & config) -{ - auto lock = lockNamedCollectionsTransaction(); - auto collections = LoadFromConfig(config).getAll(); - auto & instance = NamedCollectionFactory::instance(); - instance.removeById(SourceId::CONFIG); - instance.add(collections); - is_loaded_from_config = true; -} - -void loadFromSQLUnlocked(ContextPtr context, std::unique_lock &) -{ - auto named_collections = LoadFromSQL(context).getAll(); - LOG_TRACE( - getLogger("NamedCollectionsUtils"), - "Loaded {} collections from SQL", named_collections.size()); - - NamedCollectionFactory::instance().add(std::move(named_collections)); - is_loaded_from_sql = true; -} - -void loadFromSQL(ContextPtr context) -{ - auto lock = lockNamedCollectionsTransaction(); - loadFromSQLUnlocked(context, lock); -} - -void loadIfNotUnlocked(std::unique_lock & lock) -{ - auto global_context = Context::getGlobalContextInstance(); - if (!is_loaded_from_config) - loadFromConfigUnlocked(global_context->getConfigRef(), lock); - if (!is_loaded_from_sql) - loadFromSQLUnlocked(global_context, lock); -} - -void loadIfNot() -{ - if (is_loaded_from_sql && is_loaded_from_config) - return; - auto lock = lockNamedCollectionsTransaction(); - loadIfNotUnlocked(lock); -} - -void removeFromSQL(const ASTDropNamedCollectionQuery & query, ContextPtr context) -{ - auto lock = lockNamedCollectionsTransaction(); - loadIfNotUnlocked(lock); - auto & instance = NamedCollectionFactory::instance(); - if (!instance.exists(query.collection_name)) - { - if (!query.if_exists) - { - throw Exception( - ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, - "Cannot remove collection `{}`, because it doesn't exist", - query.collection_name); - } - return; - } - LoadFromSQL(context).remove(query.collection_name); - instance.remove(query.collection_name); -} - -void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context) -{ - auto lock = lockNamedCollectionsTransaction(); - loadIfNotUnlocked(lock); - auto & instance = NamedCollectionFactory::instance(); - if (instance.exists(query.collection_name)) - { - if (!query.if_not_exists) - { - throw Exception( - ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, - "A named collection `{}` already exists", - query.collection_name); - } - return; - } - instance.add(query.collection_name, LoadFromSQL(context).create(query)); -} - -void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context) -{ - auto lock = lockNamedCollectionsTransaction(); - loadIfNotUnlocked(lock); - auto & instance = NamedCollectionFactory::instance(); - if (!instance.exists(query.collection_name)) - { - if (!query.if_exists) - { - throw Exception( - ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, - "Cannot remove collection `{}`, because it doesn't exist", - query.collection_name); - } - return; - } - LoadFromSQL(context).update(query); - - auto collection = instance.getMutable(query.collection_name); - auto collection_lock = collection->lock(); - - for (const auto & [name, value] : query.changes) - { - auto it_override = query.overridability.find(name); - if (it_override != query.overridability.end()) - collection->setOrUpdate(name, convertFieldToString(value), it_override->second); - else - collection->setOrUpdate(name, convertFieldToString(value), {}); - } - - for (const auto & key : query.delete_keys) - collection->remove(key); -} - -} - -} diff --git a/src/Common/NamedCollections/NamedCollectionUtils.h b/src/Common/NamedCollections/NamedCollectionUtils.h deleted file mode 100644 index 293b3ea659d..00000000000 --- a/src/Common/NamedCollections/NamedCollectionUtils.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once -#include - -namespace Poco { namespace Util { class AbstractConfiguration; } } - -namespace DB -{ - -class ASTCreateNamedCollectionQuery; -class ASTAlterNamedCollectionQuery; -class ASTDropNamedCollectionQuery; - -namespace NamedCollectionUtils -{ - -enum class SourceId : uint8_t -{ - NONE = 0, - CONFIG = 1, - SQL = 2, -}; - -void loadFromConfig(const Poco::Util::AbstractConfiguration & config); -void reloadFromConfig(const Poco::Util::AbstractConfiguration & config); - -/// Load named collections from `context->getPath() / named_collections /`. -void loadFromSQL(ContextPtr context); - -/// Remove collection as well as its metadata from `context->getPath() / named_collections /`. -void removeFromSQL(const ASTDropNamedCollectionQuery & query, ContextPtr context); - -/// Create a new collection from AST and put it to `context->getPath() / named_collections /`. -void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context); - -/// Update definition of already existing collection from AST and update result in `context->getPath() / named_collections /`. -void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context); - -void loadIfNot(); - -} - -} diff --git a/src/Common/NamedCollections/NamedCollections.cpp b/src/Common/NamedCollections/NamedCollections.cpp index 04d2099f4df..74ce405f71d 100644 --- a/src/Common/NamedCollections/NamedCollections.cpp +++ b/src/Common/NamedCollections/NamedCollections.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include @@ -297,7 +296,7 @@ MutableNamedCollectionPtr NamedCollection::duplicate() const auto impl = pimpl->createCopy(collection_name); return std::unique_ptr( new NamedCollection( - std::move(impl), collection_name, NamedCollectionUtils::SourceId::NONE, true)); + std::move(impl), collection_name, SourceId::NONE, true)); } NamedCollection::Keys NamedCollection::getKeys(ssize_t depth, const std::string & prefix) const diff --git a/src/Common/NamedCollections/NamedCollections.h b/src/Common/NamedCollections/NamedCollections.h index c253c56594f..23862c4515a 100644 --- a/src/Common/NamedCollections/NamedCollections.h +++ b/src/Common/NamedCollections/NamedCollections.h @@ -1,7 +1,6 @@ #pragma once #include #include -#include namespace Poco { namespace Util { class AbstractConfiguration; } } @@ -23,7 +22,12 @@ class NamedCollection public: using Key = std::string; using Keys = std::set>; - using SourceId = NamedCollectionUtils::SourceId; + enum class SourceId : uint8_t + { + NONE = 0, + CONFIG = 1, + SQL = 2, + }; static MutableNamedCollectionPtr create( const Poco::Util::AbstractConfiguration & config, diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.cpp b/src/Common/NamedCollections/NamedCollectionsFactory.cpp index dd69952429f..14105a8651d 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.cpp +++ b/src/Common/NamedCollections/NamedCollectionsFactory.cpp @@ -1,5 +1,7 @@ #include -#include +#include +#include +#include namespace DB { @@ -17,23 +19,29 @@ NamedCollectionFactory & NamedCollectionFactory::instance() return instance; } +NamedCollectionFactory::~NamedCollectionFactory() +{ + shutdown(); +} + +void NamedCollectionFactory::shutdown() +{ + shutdown_called = true; + if (update_task) + update_task->deactivate(); + metadata_storage.reset(); +} + bool NamedCollectionFactory::exists(const std::string & collection_name) const { std::lock_guard lock(mutex); - return existsUnlocked(collection_name, lock); -} - -bool NamedCollectionFactory::existsUnlocked( - const std::string & collection_name, - std::lock_guard & /* lock */) const -{ - return loaded_named_collections.contains(collection_name); + return exists(collection_name, lock); } NamedCollectionPtr NamedCollectionFactory::get(const std::string & collection_name) const { std::lock_guard lock(mutex); - auto collection = tryGetUnlocked(collection_name, lock); + auto collection = tryGet(collection_name, lock); if (!collection) { throw Exception( @@ -47,14 +55,35 @@ NamedCollectionPtr NamedCollectionFactory::get(const std::string & collection_na NamedCollectionPtr NamedCollectionFactory::tryGet(const std::string & collection_name) const { std::lock_guard lock(mutex); - return tryGetUnlocked(collection_name, lock); + return tryGet(collection_name, lock); +} + +NamedCollectionsMap NamedCollectionFactory::getAll() const +{ + std::lock_guard lock(mutex); + return loaded_named_collections; +} + +bool NamedCollectionFactory::exists(const std::string & collection_name, std::lock_guard &) const +{ + return loaded_named_collections.contains(collection_name); +} + +MutableNamedCollectionPtr NamedCollectionFactory::tryGet( + const std::string & collection_name, + std::lock_guard &) const +{ + auto it = loaded_named_collections.find(collection_name); + if (it == loaded_named_collections.end()) + return nullptr; + return it->second; } MutableNamedCollectionPtr NamedCollectionFactory::getMutable( - const std::string & collection_name) const + const std::string & collection_name, + std::lock_guard & lock) const { - std::lock_guard lock(mutex); - auto collection = tryGetUnlocked(collection_name, lock); + auto collection = tryGet(collection_name, lock); if (!collection) { throw Exception( @@ -73,35 +102,10 @@ MutableNamedCollectionPtr NamedCollectionFactory::getMutable( return collection; } -MutableNamedCollectionPtr NamedCollectionFactory::tryGetUnlocked( - const std::string & collection_name, - std::lock_guard & /* lock */) const -{ - auto it = loaded_named_collections.find(collection_name); - if (it == loaded_named_collections.end()) - return nullptr; - return it->second; -} - void NamedCollectionFactory::add( - const std::string & collection_name, - MutableNamedCollectionPtr collection) -{ - std::lock_guard lock(mutex); - addUnlocked(collection_name, collection, lock); -} - -void NamedCollectionFactory::add(NamedCollectionsMap collections) -{ - std::lock_guard lock(mutex); - for (const auto & [collection_name, collection] : collections) - addUnlocked(collection_name, collection, lock); -} - -void NamedCollectionFactory::addUnlocked( const std::string & collection_name, MutableNamedCollectionPtr collection, - std::lock_guard & /* lock */) + std::lock_guard &) { auto [it, inserted] = loaded_named_collections.emplace(collection_name, collection); if (!inserted) @@ -113,10 +117,15 @@ void NamedCollectionFactory::addUnlocked( } } -void NamedCollectionFactory::remove(const std::string & collection_name) +void NamedCollectionFactory::add(NamedCollectionsMap collections, std::lock_guard & lock) { - std::lock_guard lock(mutex); - bool removed = removeIfExistsUnlocked(collection_name, lock); + for (const auto & [collection_name, collection] : collections) + add(collection_name, collection, lock); +} + +void NamedCollectionFactory::remove(const std::string & collection_name, std::lock_guard & lock) +{ + bool removed = removeIfExists(collection_name, lock); if (!removed) { throw Exception( @@ -126,17 +135,11 @@ void NamedCollectionFactory::remove(const std::string & collection_name) } } -void NamedCollectionFactory::removeIfExists(const std::string & collection_name) -{ - std::lock_guard lock(mutex); - removeIfExistsUnlocked(collection_name, lock); // NOLINT -} - -bool NamedCollectionFactory::removeIfExistsUnlocked( +bool NamedCollectionFactory::removeIfExists( const std::string & collection_name, std::lock_guard & lock) { - auto collection = tryGetUnlocked(collection_name, lock); + auto collection = tryGet(collection_name, lock); if (!collection) return false; @@ -152,18 +155,246 @@ bool NamedCollectionFactory::removeIfExistsUnlocked( return true; } -void NamedCollectionFactory::removeById(NamedCollectionUtils::SourceId id) +void NamedCollectionFactory::removeById(NamedCollection::SourceId id, std::lock_guard &) { - std::lock_guard lock(mutex); std::erase_if( loaded_named_collections, [&](const auto & value) { return value.second->getSourceId() == id; }); } -NamedCollectionsMap NamedCollectionFactory::getAll() const +namespace +{ + constexpr auto NAMED_COLLECTIONS_CONFIG_PREFIX = "named_collections"; + + std::vector listCollections(const Poco::Util::AbstractConfiguration & config) + { + Poco::Util::AbstractConfiguration::Keys collections_names; + config.keys(NAMED_COLLECTIONS_CONFIG_PREFIX, collections_names); + return collections_names; + } + + MutableNamedCollectionPtr getCollection( + const Poco::Util::AbstractConfiguration & config, + const std::string & collection_name) + { + const auto collection_prefix = fmt::format("{}.{}", NAMED_COLLECTIONS_CONFIG_PREFIX, collection_name); + std::queue enumerate_input; + std::set> enumerate_result; + + enumerate_input.push(collection_prefix); + NamedCollectionConfiguration::listKeys(config, std::move(enumerate_input), enumerate_result, -1); + + /// Collection does not have any keys. (`enumerate_result` == ). + const bool collection_is_empty = enumerate_result.size() == 1 + && *enumerate_result.begin() == collection_prefix; + + std::set> keys; + if (!collection_is_empty) + { + /// Skip collection prefix and add +1 to avoid '.' in the beginning. + for (const auto & path : enumerate_result) + keys.emplace(path.substr(collection_prefix.size() + 1)); + } + + return NamedCollection::create( + config, collection_name, collection_prefix, keys, NamedCollection::SourceId::CONFIG, /* is_mutable */false); + } + + NamedCollectionsMap getNamedCollections(const Poco::Util::AbstractConfiguration & config) + { + NamedCollectionsMap result; + for (const auto & collection_name : listCollections(config)) + { + if (result.contains(collection_name)) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, + "Found duplicate named collection `{}`", + collection_name); + } + result.emplace(collection_name, getCollection(config, collection_name)); + } + return result; + } +} + +void NamedCollectionFactory::loadIfNot() { std::lock_guard lock(mutex); - return loaded_named_collections; + loadIfNot(lock); +} + +bool NamedCollectionFactory::loadIfNot(std::lock_guard & lock) +{ + if (loaded) + return false; + + auto context = Context::getGlobalContextInstance(); + metadata_storage = NamedCollectionsMetadataStorage::create(context); + + loadFromConfig(context->getConfigRef(), lock); + loadFromSQL(lock); + + if (metadata_storage->supportsPeriodicUpdate()) + { + update_task = context->getSchedulePool().createTask("NamedCollectionsMetadataStorage", [this]{ updateFunc(); }); + update_task->activate(); + update_task->schedule(); + } + + loaded = true; + return true; +} + +void NamedCollectionFactory::loadFromConfig(const Poco::Util::AbstractConfiguration & config, std::lock_guard & lock) +{ + auto collections = getNamedCollections(config); + LOG_TEST(log, "Loaded {} collections from config", collections.size()); + add(std::move(collections), lock); +} + +void NamedCollectionFactory::reloadFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + std::lock_guard lock(mutex); + if (loadIfNot(lock)) + return; + + auto collections = getNamedCollections(config); + LOG_TEST(log, "Loaded {} collections from config", collections.size()); + + removeById(NamedCollection::SourceId::CONFIG, lock); + add(std::move(collections), lock); +} + +void NamedCollectionFactory::loadFromSQL(std::lock_guard & lock) +{ + auto collections = metadata_storage->getAll(); + LOG_TEST(log, "Loaded {} collections from sql", collections.size()); + add(std::move(collections), lock); +} + +void NamedCollectionFactory::createFromSQL(const ASTCreateNamedCollectionQuery & query) +{ + std::lock_guard lock(mutex); + loadIfNot(lock); + + if (exists(query.collection_name, lock)) + { + if (query.if_not_exists) + return; + + throw Exception( + ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, + "A named collection `{}` already exists", + query.collection_name); + } + + add(query.collection_name, metadata_storage->create(query), lock); +} + +void NamedCollectionFactory::removeFromSQL(const ASTDropNamedCollectionQuery & query) +{ + std::lock_guard lock(mutex); + loadIfNot(lock); + + if (!exists(query.collection_name, lock)) + { + if (query.if_exists) + return; + + throw Exception( + ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, + "Cannot remove collection `{}`, because it doesn't exist", + query.collection_name); + } + + metadata_storage->remove(query.collection_name); + remove(query.collection_name, lock); +} + +void NamedCollectionFactory::updateFromSQL(const ASTAlterNamedCollectionQuery & query) +{ + std::lock_guard lock(mutex); + loadIfNot(lock); + + if (!exists(query.collection_name, lock)) + { + if (query.if_exists) + return; + + throw Exception( + ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, + "Cannot remove collection `{}`, because it doesn't exist", + query.collection_name); + } + + metadata_storage->update(query); + + auto collection = getMutable(query.collection_name, lock); + auto collection_lock = collection->lock(); + + for (const auto & [name, value] : query.changes) + { + auto it_override = query.overridability.find(name); + if (it_override != query.overridability.end()) + collection->setOrUpdate(name, convertFieldToString(value), it_override->second); + else + collection->setOrUpdate(name, convertFieldToString(value), {}); + } + + for (const auto & key : query.delete_keys) + collection->remove(key); +} + +void NamedCollectionFactory::reloadFromSQL() +{ + std::lock_guard lock(mutex); + if (loadIfNot(lock)) + return; + + auto collections = metadata_storage->getAll(); + removeById(NamedCollection::SourceId::SQL, lock); + add(std::move(collections), lock); +} + +void NamedCollectionFactory::updateFunc() +{ + LOG_TRACE(log, "Named collections background updating thread started"); + + while (!shutdown_called.load()) + { + if (metadata_storage->waitUpdate()) + { + try + { + reloadFromSQL(); + } + catch (const Coordination::Exception & e) + { + if (Coordination::isHardwareError(e.code)) + { + LOG_INFO(log, "Lost ZooKeeper connection, will try to connect again: {}", + DB::getCurrentExceptionMessage(true)); + + sleepForSeconds(1); + } + else + { + tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); + } + continue; + } + catch (...) + { + DB::tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); + continue; + } + } + } + + LOG_TRACE(log, "Named collections background updating thread finished"); } } diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.h b/src/Common/NamedCollections/NamedCollectionsFactory.h index 2d64a03bde3..6ee5940e686 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.h +++ b/src/Common/NamedCollections/NamedCollectionsFactory.h @@ -1,58 +1,83 @@ #pragma once #include +#include +#include namespace DB { +class ASTCreateNamedCollectionQuery; +class ASTDropNamedCollectionQuery; +class ASTAlterNamedCollectionQuery; class NamedCollectionFactory : boost::noncopyable { public: static NamedCollectionFactory & instance(); + ~NamedCollectionFactory(); + bool exists(const std::string & collection_name) const; NamedCollectionPtr get(const std::string & collection_name) const; NamedCollectionPtr tryGet(const std::string & collection_name) const; - MutableNamedCollectionPtr getMutable(const std::string & collection_name) const; - - void add(const std::string & collection_name, MutableNamedCollectionPtr collection); - - void add(NamedCollectionsMap collections); - - void update(NamedCollectionsMap collections); - - void remove(const std::string & collection_name); - - void removeIfExists(const std::string & collection_name); - - void removeById(NamedCollectionUtils::SourceId id); - NamedCollectionsMap getAll() const; -private: - bool existsUnlocked( - const std::string & collection_name, - std::lock_guard & lock) const; + void reloadFromConfig(const Poco::Util::AbstractConfiguration & config); - MutableNamedCollectionPtr tryGetUnlocked( - const std::string & collection_name, - std::lock_guard & lock) const; + void reloadFromSQL(); - void addUnlocked( - const std::string & collection_name, - MutableNamedCollectionPtr collection, - std::lock_guard & lock); + void createFromSQL(const ASTCreateNamedCollectionQuery & query); - bool removeIfExistsUnlocked( - const std::string & collection_name, - std::lock_guard & lock); + void removeFromSQL(const ASTDropNamedCollectionQuery & query); + void updateFromSQL(const ASTAlterNamedCollectionQuery & query); + + void loadIfNot(); + + void shutdown(); + +protected: mutable NamedCollectionsMap loaded_named_collections; - mutable std::mutex mutex; - bool is_initialized = false; + + const LoggerPtr log = getLogger("NamedCollectionFactory"); + + bool loaded = false; + std::atomic shutdown_called = false; + std::unique_ptr metadata_storage; + BackgroundSchedulePool::TaskHolder update_task; + + bool loadIfNot(std::lock_guard & lock); + + bool exists( + const std::string & collection_name, + std::lock_guard & lock) const; + + MutableNamedCollectionPtr getMutable(const std::string & collection_name, std::lock_guard & lock) const; + + void add(const std::string & collection_name, MutableNamedCollectionPtr collection, std::lock_guard & lock); + + void add(NamedCollectionsMap collections, std::lock_guard & lock); + + void update(NamedCollectionsMap collections, std::lock_guard & lock); + + void remove(const std::string & collection_name, std::lock_guard & lock); + + bool removeIfExists(const std::string & collection_name, std::lock_guard & lock); + + MutableNamedCollectionPtr tryGet(const std::string & collection_name, std::lock_guard & lock) const; + + void removeById(NamedCollection::SourceId id, std::lock_guard & lock); + + void loadFromConfig( + const Poco::Util::AbstractConfiguration & config, + std::lock_guard & lock); + + void loadFromSQL(std::lock_guard & lock); + + void updateFunc(); }; } diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp new file mode 100644 index 00000000000..32fdb25abd3 --- /dev/null +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -0,0 +1,519 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NAMED_COLLECTION_ALREADY_EXISTS; + extern const int NAMED_COLLECTION_DOESNT_EXIST; + extern const int INVALID_CONFIG_PARAMETER; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + +static const std::string named_collections_storage_config_path = "named_collections_storage"; + +namespace +{ + MutableNamedCollectionPtr createNamedCollectionFromAST(const ASTCreateNamedCollectionQuery & query) + { + const auto & collection_name = query.collection_name; + const auto config = NamedCollectionConfiguration::createConfiguration(collection_name, query.changes, query.overridability); + + std::set> keys; + for (const auto & [name, _] : query.changes) + keys.insert(name); + + return NamedCollection::create( + *config, collection_name, "", keys, NamedCollection::SourceId::SQL, /* is_mutable */true); + } + + std::string getFileName(const std::string & collection_name) + { + return escapeForFileName(collection_name) + ".sql"; + } +} + +class NamedCollectionsMetadataStorage::INamedCollectionsStorage +{ +public: + virtual ~INamedCollectionsStorage() = default; + + virtual bool exists(const std::string & path) const = 0; + + virtual std::vector list() const = 0; + + virtual std::string read(const std::string & path) const = 0; + + virtual void write(const std::string & path, const std::string & data, bool replace) = 0; + + virtual void remove(const std::string & path) = 0; + + virtual bool removeIfExists(const std::string & path) = 0; + + virtual bool supportsPeriodicUpdate() const = 0; + + virtual bool waitUpdate(size_t /* timeout */) { return false; } +}; + + +class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, private WithContext +{ +private: + std::string root_path; + +public: + LocalStorage(ContextPtr context_, const std::string & path_) + : WithContext(context_) + , root_path(path_) + { + if (fs::exists(root_path)) + cleanup(); + } + + ~LocalStorage() override = default; + + bool supportsPeriodicUpdate() const override { return false; } + + std::vector list() const override + { + if (!fs::exists(root_path)) + return {}; + + std::vector elements; + for (fs::directory_iterator it{root_path}; it != fs::directory_iterator{}; ++it) + { + const auto & current_path = it->path(); + if (current_path.extension() == ".sql") + { + elements.push_back(it->path()); + } + else + { + LOG_WARNING( + getLogger("LocalStorage"), + "Unexpected file {} in named collections directory", + current_path.filename().string()); + } + } + return elements; + } + + bool exists(const std::string & path) const override + { + return fs::exists(getPath(path)); + } + + std::string read(const std::string & path) const override + { + ReadBufferFromFile in(getPath(path)); + std::string data; + readStringUntilEOF(data, in); + return data; + } + + void write(const std::string & path, const std::string & data, bool replace) override + { + if (!replace && fs::exists(path)) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, + "Metadata file {} for named collection already exists", + path); + } + + fs::create_directories(root_path); + + auto tmp_path = getPath(path + ".tmp"); + WriteBufferFromFile out(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL); + writeString(data, out); + + out.next(); + if (getContext()->getSettingsRef().fsync_metadata) + out.sync(); + out.close(); + + fs::rename(tmp_path, getPath(path)); + } + + void remove(const std::string & path) override + { + if (!removeIfExists(getPath(path))) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, + "Cannot remove `{}`, because it doesn't exist", path); + } + } + + bool removeIfExists(const std::string & path) override + { + return fs::remove(getPath(path)); + } + +private: + std::string getPath(const std::string & path) const + { + return fs::path(root_path) / path; + } + + /// Delete .tmp files. They could be left undeleted in case of + /// some exception or abrupt server restart. + void cleanup() + { + std::vector files_to_remove; + for (fs::directory_iterator it{root_path}; it != fs::directory_iterator{}; ++it) + { + const auto & current_path = it->path(); + if (current_path.extension() == ".tmp") + files_to_remove.push_back(current_path); + } + for (const auto & file : files_to_remove) + fs::remove(file); + } +}; + + +class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, private WithContext +{ +private: + std::string root_path; + mutable zkutil::ZooKeeperPtr zookeeper_client{nullptr}; + mutable zkutil::EventPtr wait_event; + mutable Int32 collections_node_cversion = 0; + +public: + ZooKeeperStorage(ContextPtr context_, const std::string & path_) + : WithContext(context_) + , root_path(path_) + { + if (root_path.empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Collections path cannot be empty"); + + if (root_path != "/" && root_path.back() == '/') + root_path.resize(root_path.size() - 1); + if (root_path.front() != '/') + root_path = "/" + root_path; + + auto client = getClient(); + if (root_path != "/" && !client->exists(root_path)) + { + client->createAncestors(root_path); + client->createIfNotExists(root_path, ""); + } + } + + ~ZooKeeperStorage() override = default; + + bool supportsPeriodicUpdate() const override { return true; } + + /// Return true if children changed. + bool waitUpdate(size_t timeout) override + { + if (!wait_event) + { + /// We did not yet made any list() attempt, so do that. + return true; + } + + if (wait_event->tryWait(timeout)) + { + /// Children changed before timeout. + return true; + } + + std::string res; + Coordination::Stat stat; + + if (!getClient()->tryGet(root_path, res, &stat)) + { + /// We do create root_path in constructor of this class, + /// so this case is not really possible. + chassert(false); + return false; + } + + return stat.cversion != collections_node_cversion; + } + + std::vector list() const override + { + if (!wait_event) + wait_event = std::make_shared(); + + Coordination::Stat stat; + auto children = getClient()->getChildren(root_path, &stat, wait_event); + collections_node_cversion = stat.cversion; + return children; + } + + bool exists(const std::string & path) const override + { + return getClient()->exists(getPath(path)); + } + + std::string read(const std::string & path) const override + { + return getClient()->get(getPath(path)); + } + + void write(const std::string & path, const std::string & data, bool replace) override + { + if (replace) + { + getClient()->createOrUpdate(getPath(path), data, zkutil::CreateMode::Persistent); + } + else + { + auto code = getClient()->tryCreate(getPath(path), data, zkutil::CreateMode::Persistent); + + if (code == Coordination::Error::ZNODEEXISTS) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, + "Metadata file {} for named collection already exists", + path); + } + } + } + + void remove(const std::string & path) override + { + getClient()->remove(getPath(path)); + } + + bool removeIfExists(const std::string & path) override + { + auto code = getClient()->tryRemove(getPath(path)); + if (code == Coordination::Error::ZOK) + return true; + if (code == Coordination::Error::ZNONODE) + return false; + throw Coordination::Exception::fromPath(code, getPath(path)); + } + +private: + zkutil::ZooKeeperPtr getClient() const + { + if (!zookeeper_client || zookeeper_client->expired()) + { + zookeeper_client = getContext()->getZooKeeper(); + zookeeper_client->sync(root_path); + } + return zookeeper_client; + } + + std::string getPath(const std::string & path) const + { + return fs::path(root_path) / path; + } +}; + +NamedCollectionsMetadataStorage::NamedCollectionsMetadataStorage( + std::shared_ptr storage_, + ContextPtr context_) + : WithContext(context_) + , storage(std::move(storage_)) +{ +} + +MutableNamedCollectionPtr NamedCollectionsMetadataStorage::get(const std::string & collection_name) const +{ + const auto query = readCreateQuery(collection_name); + return createNamedCollectionFromAST(query); +} + +NamedCollectionsMap NamedCollectionsMetadataStorage::getAll() const +{ + NamedCollectionsMap result; + for (const auto & collection_name : listCollections()) + { + if (result.contains(collection_name)) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, + "Found duplicate named collection `{}`", + collection_name); + } + result.emplace(collection_name, get(collection_name)); + } + return result; +} + +MutableNamedCollectionPtr NamedCollectionsMetadataStorage::create(const ASTCreateNamedCollectionQuery & query) +{ + writeCreateQuery(query); + return createNamedCollectionFromAST(query); +} + +void NamedCollectionsMetadataStorage::remove(const std::string & collection_name) +{ + storage->remove(getFileName(collection_name)); +} + +bool NamedCollectionsMetadataStorage::removeIfExists(const std::string & collection_name) +{ + return storage->removeIfExists(getFileName(collection_name)); +} + +void NamedCollectionsMetadataStorage::update(const ASTAlterNamedCollectionQuery & query) +{ + auto create_query = readCreateQuery(query.collection_name); + + std::unordered_map result_changes_map; + for (const auto & [name, value] : query.changes) + { + auto [it, inserted] = result_changes_map.emplace(name, value); + if (!inserted) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Value with key `{}` is used twice in the SET query (collection name: {})", + name, query.collection_name); + } + } + + for (const auto & [name, value] : create_query.changes) + result_changes_map.emplace(name, value); + + std::unordered_map result_overridability_map; + for (const auto & [name, value] : query.overridability) + result_overridability_map.emplace(name, value); + for (const auto & [name, value] : create_query.overridability) + result_overridability_map.emplace(name, value); + + for (const auto & delete_key : query.delete_keys) + { + auto it = result_changes_map.find(delete_key); + if (it == result_changes_map.end()) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot delete key `{}` because it does not exist in collection", + delete_key); + } + else + { + result_changes_map.erase(it); + auto it_override = result_overridability_map.find(delete_key); + if (it_override != result_overridability_map.end()) + result_overridability_map.erase(it_override); + } + } + + create_query.changes.clear(); + for (const auto & [name, value] : result_changes_map) + create_query.changes.emplace_back(name, value); + create_query.overridability = std::move(result_overridability_map); + + if (create_query.changes.empty()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Named collection cannot be empty (collection name: {})", + query.collection_name); + + chassert(create_query.collection_name == query.collection_name); + writeCreateQuery(create_query, true); +} + +std::vector NamedCollectionsMetadataStorage::listCollections() const +{ + auto paths = storage->list(); + std::vector collections; + collections.reserve(paths.size()); + for (const auto & path : paths) + collections.push_back(std::filesystem::path(path).stem()); + return collections; +} + +ASTCreateNamedCollectionQuery NamedCollectionsMetadataStorage::readCreateQuery(const std::string & collection_name) const +{ + const auto path = getFileName(collection_name); + auto query = storage->read(path); + const auto & settings = getContext()->getSettingsRef(); + + ParserCreateNamedCollectionQuery parser; + auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth, settings.max_parser_backtracks); + const auto & create_query = ast->as(); + return create_query; +} + +void NamedCollectionsMetadataStorage::writeCreateQuery(const ASTCreateNamedCollectionQuery & query, bool replace) +{ + auto normalized_query = query.clone(); + auto & changes = typeid_cast(normalized_query.get())->changes; + ::sort( + changes.begin(), changes.end(), + [](const SettingChange & lhs, const SettingChange & rhs) { return lhs.name < rhs.name; }); + + storage->write(getFileName(query.collection_name), serializeAST(*normalized_query), replace); +} + +bool NamedCollectionsMetadataStorage::supportsPeriodicUpdate() const +{ + return storage->supportsPeriodicUpdate(); +} + +bool NamedCollectionsMetadataStorage::waitUpdate() +{ + if (!storage->supportsPeriodicUpdate()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Periodic updates are not supported"); + + const auto & config = Context::getGlobalContextInstance()->getConfigRef(); + const size_t timeout = config.getUInt(named_collections_storage_config_path + ".update_timeout_ms", 5000); + + return storage->waitUpdate(timeout); +} + +std::unique_ptr NamedCollectionsMetadataStorage::create(const ContextPtr & context_) +{ + const auto & config = context_->getConfigRef(); + const auto storage_type = config.getString(named_collections_storage_config_path + ".type", "local"); + + if (storage_type == "local") + { + const auto path = config.getString( + named_collections_storage_config_path + ".path", + std::filesystem::path(context_->getPath()) / "named_collections"); + + LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"), + "Using local storage for named collections at path: {}", path); + + auto local_storage = std::make_unique(context_, path); + return std::unique_ptr( + new NamedCollectionsMetadataStorage(std::move(local_storage), context_)); + } + if (storage_type == "zookeeper" || storage_type == "keeper") + { + const auto path = config.getString(named_collections_storage_config_path + ".path"); + auto zk_storage = std::make_unique(context_, path); + + LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"), + "Using zookeeper storage for named collections at path: {}", path); + + return std::unique_ptr( + new NamedCollectionsMetadataStorage(std::move(zk_storage), context_)); + } + + throw Exception( + ErrorCodes::INVALID_CONFIG_PARAMETER, + "Unknown storage for named collections: {}", storage_type); +} + +} diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h new file mode 100644 index 00000000000..3c089fe2fa2 --- /dev/null +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h @@ -0,0 +1,52 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace DB +{ + +class NamedCollectionsMetadataStorage : private WithContext +{ +public: + static std::unique_ptr create(const ContextPtr & context); + + NamedCollectionsMap getAll() const; + + MutableNamedCollectionPtr get(const std::string & collection_name) const; + + MutableNamedCollectionPtr create(const ASTCreateNamedCollectionQuery & query); + + void remove(const std::string & collection_name); + + bool removeIfExists(const std::string & collection_name); + + void update(const ASTAlterNamedCollectionQuery & query); + + void shutdown(); + + /// Return true if update was made + bool waitUpdate(); + + bool supportsPeriodicUpdate() const; + +private: + class INamedCollectionsStorage; + class LocalStorage; + class ZooKeeperStorage; + + std::shared_ptr storage; + + NamedCollectionsMetadataStorage(std::shared_ptr storage_, ContextPtr context_); + + std::vector listCollections() const; + + ASTCreateNamedCollectionQuery readCreateQuery(const std::string & collection_name) const; + + void writeCreateQuery(const ASTCreateNamedCollectionQuery & query, bool replace = false); +}; + + +} diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index ece5114a998..92ef0597c7e 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -1,17 +1,20 @@ #pragma once -#include -#include -#include -#include +#include "config.h" + #include #include +#include +#include +#include +#include +#include + +#include +#include +#include #include #include -#include -#include -#include -#include #ifndef NDEBUG #include @@ -112,6 +115,11 @@ protected: template void alloc(size_t bytes, TAllocatorParams &&... allocator_params) { +#if USE_GWP_ASAN + if (unlikely(GWPAsan::shouldForceSample())) + gwp_asan::getThreadLocals()->NextSampleCounter = 1; +#endif + char * allocated = reinterpret_cast(TAllocator::alloc(bytes, std::forward(allocator_params)...)); c_start = allocated + pad_left; @@ -141,6 +149,11 @@ protected: return; } +#if USE_GWP_ASAN + if (unlikely(GWPAsan::shouldForceSample())) + gwp_asan::getThreadLocals()->NextSampleCounter = 1; +#endif + unprotect(); ptrdiff_t end_diff = c_end - c_start; diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index f73e16c517d..52db7b8ce2e 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -3,6 +3,7 @@ #include +// clang-format off /// Available events. Add something here as you wish. /// If the event is generic (i.e. not server specific) /// it should be also added to src/Coordination/KeeperConstant.cpp @@ -14,6 +15,7 @@ M(QueriesWithSubqueries, "Count queries with all subqueries") \ M(SelectQueriesWithSubqueries, "Count SELECT queries with all subqueries") \ M(InsertQueriesWithSubqueries, "Count INSERT queries with all subqueries") \ + M(SelectQueriesWithPrimaryKeyUsage, "Count SELECT queries which use the primary key to evaluate the WHERE condition") \ M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \ M(AsyncInsertBytes, "Data size in bytes of asynchronous INSERT queries.") \ M(AsyncInsertRows, "Number of rows inserted by asynchronous INSERT queries.") \ @@ -445,14 +447,18 @@ The server successfully detected this situation and will download merged part fr M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ \ M(AzureGetObject, "Number of Azure API GetObject calls.") \ - M(AzureUploadPart, "Number of Azure blob storage API UploadPart calls") \ + M(AzureUpload, "Number of Azure blob storage API Upload calls") \ + M(AzureStageBlock, "Number of Azure blob storage API StageBlock calls") \ + M(AzureCommitBlockList, "Number of Azure blob storage API CommitBlockList calls") \ M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls") \ M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.") \ M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.") \ \ M(DiskAzureGetObject, "Number of Disk Azure API GetObject calls.") \ - M(DiskAzureUploadPart, "Number of Disk Azure blob storage API UploadPart calls") \ + M(DiskAzureUpload, "Number of Disk Azure blob storage API Upload calls") \ + M(DiskAzureStageBlock, "Number of Disk Azure blob storage API StageBlock calls") \ + M(DiskAzureCommitBlockList, "Number of Disk Azure blob storage API CommitBlockList calls") \ M(DiskAzureCopyObject, "Number of Disk Azure blob storage API CopyObject calls") \ M(DiskAzureListObjects, "Number of Disk Azure blob storage API ListObjects calls.") \ M(DiskAzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.") \ @@ -609,6 +615,13 @@ The server successfully detected this situation and will download merged part fr M(KeeperPacketsReceived, "Packets received by keeper server") \ M(KeeperRequestTotal, "Total requests number on keeper server") \ M(KeeperLatency, "Keeper latency") \ + M(KeeperTotalElapsedMicroseconds, "Keeper total latency for a single request") \ + M(KeeperProcessElapsedMicroseconds, "Keeper commit latency for a single request") \ + M(KeeperPreprocessElapsedMicroseconds, "Keeper preprocessing latency for a single reuquest") \ + M(KeeperStorageLockWaitMicroseconds, "Time spent waiting for acquiring Keeper storage lock") \ + M(KeeperCommitWaitElapsedMicroseconds, "Time spent waiting for certain log to be committed") \ + M(KeeperBatchMaxCount, "Number of times the size of batch was limited by the amount") \ + M(KeeperBatchMaxTotalSize, "Number of times the size of batch was limited by the total bytes size") \ M(KeeperCommits, "Number of successful commits") \ M(KeeperCommitsFailed, "Number of failed commits") \ M(KeeperSnapshotCreations, "Number of snapshots creations")\ @@ -635,15 +648,16 @@ The server successfully detected this situation and will download merged part fr M(S3QueueSetFileProcessingMicroseconds, "Time spent to set file as processing")\ M(S3QueueSetFileProcessedMicroseconds, "Time spent to set file as processed")\ M(S3QueueSetFileFailedMicroseconds, "Time spent to set file as failed")\ - M(S3QueueFailedFiles, "Number of files which failed to be processed")\ - M(S3QueueProcessedFiles, "Number of files which were processed")\ - M(S3QueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\ - M(S3QueuePullMicroseconds, "Time spent to read file data")\ - M(S3QueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\ + M(ObjectStorageQueueFailedFiles, "Number of files which failed to be processed")\ + M(ObjectStorageQueueProcessedFiles, "Number of files which were processed")\ + M(ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\ + M(ObjectStorageQueuePullMicroseconds, "Time spent to read file data")\ + M(ObjectStorageQueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\ \ M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\ M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \ - M(IOUringSQEsResubmits, "Total number of io_uring SQE resubmits performed") \ + M(IOUringSQEsResubmitsAsync, "Total number of asynchronous io_uring SQE resubmits performed") \ + M(IOUringSQEsResubmitsSync, "Total number of synchronous io_uring SQE resubmits performed") \ M(IOUringCQEsCompleted, "Total number of successfully completed io_uring CQEs") \ M(IOUringCQEsFailed, "Total number of completed io_uring CQEs with failures") \ \ @@ -753,6 +767,10 @@ The server successfully detected this situation and will download merged part fr \ M(ReadWriteBufferFromHTTPRequestsSent, "Number of HTTP requests sent by ReadWriteBufferFromHTTP") \ M(ReadWriteBufferFromHTTPBytes, "Total size of payload bytes received and sent by ReadWriteBufferFromHTTP. Doesn't include HTTP headers.") \ + \ + M(GWPAsanAllocateSuccess, "Number of successful allocations done by GWPAsan") \ + M(GWPAsanAllocateFailed, "Number of failed allocations done by GWPAsan (i.e. filled pool)") \ + M(GWPAsanFree, "Number of free operations done by GWPAsan") \ #ifdef APPLY_FOR_EXTERNAL_EVENTS diff --git a/src/Common/ProfileEvents.h b/src/Common/ProfileEvents.h index e670b8907d2..f196ed5a04c 100644 --- a/src/Common/ProfileEvents.h +++ b/src/Common/ProfileEvents.h @@ -40,6 +40,7 @@ namespace ProfileEvents Timer(Counters & counters_, Event timer_event_, Event counter_event, Resolution resolution_); ~Timer() { end(); } void cancel() { watch.reset(); } + void restart() { watch.restart(); } void end(); UInt64 get(); diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 7b07c72824a..0b482cb09be 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -92,19 +92,19 @@ void ProgressIndication::writeFinalProgress() if (progress.read_rows < 1000) return; - std::cout << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, " + output_stream << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, " << formatReadableSizeWithDecimalSuffix(progress.read_bytes); UInt64 elapsed_ns = getElapsedNanoseconds(); if (elapsed_ns) - std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., " + output_stream << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., " << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)"; else - std::cout << ". "; + output_stream << ". "; auto peak_memory_usage = getMemoryUsage().peak; if (peak_memory_usage >= 0) - std::cout << "\nPeak memory usage: " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << "."; + output_stream << "\nPeak memory usage: " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << "."; } void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) @@ -125,7 +125,7 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) const char * indicator = indicators[increment % 8]; - size_t terminal_width = getTerminalWidth(); + size_t terminal_width = getTerminalWidth(in_fd, err_fd); if (!written_progress_chars) { diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index a9965785889..ae39fb49bcc 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -32,6 +32,19 @@ using HostToTimesMap = std::unordered_map; class ProgressIndication { public: + + explicit ProgressIndication + ( + std::ostream & output_stream_ = std::cout, + int in_fd_ = STDIN_FILENO, + int err_fd_ = STDERR_FILENO + ) + : output_stream(output_stream_), + in_fd(in_fd_), + err_fd(err_fd_) + { + } + /// Write progress bar. void writeProgress(WriteBufferFromFileDescriptor & message); void clearProgressOutput(WriteBufferFromFileDescriptor & message); @@ -103,6 +116,10 @@ private: /// - hosts_data/cpu_usage_meter (guarded with profile_events_mutex) mutable std::mutex profile_events_mutex; mutable std::mutex progress_mutex; + + std::ostream & output_stream; + int in_fd; + int err_fd; }; } diff --git a/src/Common/ProxyConfiguration.h b/src/Common/ProxyConfiguration.h index 97577735bce..a9921f1474d 100644 --- a/src/Common/ProxyConfiguration.h +++ b/src/Common/ProxyConfiguration.h @@ -44,11 +44,18 @@ struct ProxyConfiguration } } + static bool useTunneling(Protocol request_protocol, Protocol proxy_protocol, bool disable_tunneling_for_https_requests_over_http_proxy) + { + bool is_https_request_over_http_proxy = request_protocol == Protocol::HTTPS && proxy_protocol == Protocol::HTTP; + return is_https_request_over_http_proxy && !disable_tunneling_for_https_requests_over_http_proxy; + } + std::string host = std::string{}; Protocol protocol = Protocol::HTTP; uint16_t port = 0; bool tunneling = false; Protocol original_request_protocol = Protocol::HTTP; + std::string no_proxy_hosts = std::string{}; bool isEmpty() const { return host.empty(); } }; diff --git a/src/Common/ProxyConfigurationResolver.h b/src/Common/ProxyConfigurationResolver.h index b82936502bb..1e9f4ad77f7 100644 --- a/src/Common/ProxyConfigurationResolver.h +++ b/src/Common/ProxyConfigurationResolver.h @@ -19,13 +19,6 @@ struct ProxyConfigurationResolver virtual void errorReport(const ProxyConfiguration & config) = 0; protected: - - static bool useTunneling(Protocol request_protocol, Protocol proxy_protocol, bool disable_tunneling_for_https_requests_over_http_proxy) - { - bool is_https_request_over_http_proxy = request_protocol == Protocol::HTTPS && proxy_protocol == Protocol::HTTP; - return is_https_request_over_http_proxy && !disable_tunneling_for_https_requests_over_http_proxy; - } - Protocol request_protocol; bool disable_tunneling_for_https_requests_over_http_proxy = false; }; diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp index 4008ac2d8a5..b06073121e7 100644 --- a/src/Common/ProxyConfigurationResolverProvider.cpp +++ b/src/Common/ProxyConfigurationResolverProvider.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -17,6 +18,11 @@ namespace ErrorCodes namespace { + std::string getNoProxyHosts(const Poco::Util::AbstractConfiguration & configuration) + { + return configuration.getString("proxy.no_proxy", ""); + } + bool isTunnelingDisabledForHTTPSRequestsOverHTTPProxy( const Poco::Util::AbstractConfiguration & configuration) { @@ -49,6 +55,7 @@ namespace return std::make_shared( server_configuration, request_protocol, + buildPocoNonProxyHosts(getNoProxyHosts(configuration)), std::make_shared(), isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration)); } @@ -88,7 +95,11 @@ namespace return uris.empty() ? nullptr - : std::make_shared(uris, request_protocol, isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration)); + : std::make_shared( + uris, + request_protocol, + buildPocoNonProxyHosts(getNoProxyHosts(configuration)), + isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration)); } bool hasRemoteResolver(const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration) diff --git a/src/Common/ProxyListConfigurationResolver.cpp b/src/Common/ProxyListConfigurationResolver.cpp index c527c89ea6b..2d5b5e97364 100644 --- a/src/Common/ProxyListConfigurationResolver.cpp +++ b/src/Common/ProxyListConfigurationResolver.cpp @@ -1,7 +1,6 @@ #include #include -#include #include namespace DB @@ -9,8 +8,11 @@ namespace DB ProxyListConfigurationResolver::ProxyListConfigurationResolver( std::vector proxies_, - Protocol request_protocol_, bool disable_tunneling_for_https_requests_over_http_proxy_) - : ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), proxies(std::move(proxies_)) + Protocol request_protocol_, + const std::string & no_proxy_hosts_, + bool disable_tunneling_for_https_requests_over_http_proxy_) + : ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), + proxies(std::move(proxies_)), no_proxy_hosts(no_proxy_hosts_) { } @@ -26,12 +28,18 @@ ProxyConfiguration ProxyListConfigurationResolver::resolve() auto & proxy = proxies[index]; + bool use_tunneling_for_https_requests_over_http_proxy = ProxyConfiguration::useTunneling( + request_protocol, + ProxyConfiguration::protocolFromString(proxy.getScheme()), + disable_tunneling_for_https_requests_over_http_proxy); + return ProxyConfiguration { proxy.getHost(), ProxyConfiguration::protocolFromString(proxy.getScheme()), proxy.getPort(), - useTunneling(request_protocol, ProxyConfiguration::protocolFromString(proxy.getScheme()), disable_tunneling_for_https_requests_over_http_proxy), - request_protocol + use_tunneling_for_https_requests_over_http_proxy, + request_protocol, + no_proxy_hosts }; } diff --git a/src/Common/ProxyListConfigurationResolver.h b/src/Common/ProxyListConfigurationResolver.h index 95e0073d779..a87826792d4 100644 --- a/src/Common/ProxyListConfigurationResolver.h +++ b/src/Common/ProxyListConfigurationResolver.h @@ -15,7 +15,11 @@ namespace DB class ProxyListConfigurationResolver : public ProxyConfigurationResolver { public: - ProxyListConfigurationResolver(std::vector proxies_, Protocol request_protocol_, bool disable_tunneling_for_https_requests_over_http_proxy_ = false); + ProxyListConfigurationResolver( + std::vector proxies_, + Protocol request_protocol_, + const std::string & no_proxy_hosts_, + bool disable_tunneling_for_https_requests_over_http_proxy_ = false); ProxyConfiguration resolve() override; @@ -23,6 +27,7 @@ public: private: std::vector proxies; + std::string no_proxy_hosts; /// Access counter to get proxy using round-robin strategy. std::atomic access_counter; diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index c3affbdd968..746010b5462 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -228,9 +228,9 @@ void Timer::cleanup() #endif template -QueryProfilerBase::QueryProfilerBase([[maybe_unused]] UInt64 thread_id, [[maybe_unused]] int clock_type, [[maybe_unused]] UInt32 period, [[maybe_unused]] int pause_signal_) - : log(getLogger("QueryProfiler")) - , pause_signal(pause_signal_) +QueryProfilerBase::QueryProfilerBase( + [[maybe_unused]] UInt64 thread_id, [[maybe_unused]] int clock_type, [[maybe_unused]] UInt32 period, [[maybe_unused]] int pause_signal_) + : log(getLogger("QueryProfiler")), pause_signal(pause_signal_) { #if defined(SANITIZER) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers"); diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 176e7af4f0f..8fd9d381ece 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -42,11 +42,12 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const RemoteProxyConfigurationResolver::RemoteProxyConfigurationResolver( const RemoteServerConfiguration & remote_server_configuration_, Protocol request_protocol_, + const std::string & no_proxy_hosts_, std::shared_ptr fetcher_, bool disable_tunneling_for_https_requests_over_http_proxy_ ) : ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), - remote_server_configuration(remote_server_configuration_), fetcher(fetcher_) + remote_server_configuration(remote_server_configuration_), no_proxy_hosts(no_proxy_hosts_), fetcher(fetcher_) { } @@ -84,7 +85,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve() auto proxy_protocol = ProxyConfiguration::protocolFromString(proxy_protocol_string); - bool use_tunneling_for_https_requests_over_http_proxy = useTunneling( + bool use_tunneling_for_https_requests_over_http_proxy = ProxyConfiguration::useTunneling( request_protocol, proxy_protocol, disable_tunneling_for_https_requests_over_http_proxy); @@ -94,6 +95,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve() cached_config.port = proxy_port; cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy; cached_config.original_request_protocol = request_protocol; + cached_config.no_proxy_hosts = no_proxy_hosts; cache_timestamp = std::chrono::system_clock::now(); cache_valid = true; diff --git a/src/Common/RemoteProxyConfigurationResolver.h b/src/Common/RemoteProxyConfigurationResolver.h index 4e61a185bb3..d41f6267b89 100644 --- a/src/Common/RemoteProxyConfigurationResolver.h +++ b/src/Common/RemoteProxyConfigurationResolver.h @@ -41,6 +41,7 @@ public: RemoteProxyConfigurationResolver( const RemoteServerConfiguration & remote_server_configuration_, Protocol request_protocol_, + const std::string & no_proxy_hosts_, std::shared_ptr fetcher_, bool disable_tunneling_for_https_requests_over_http_proxy_ = false); @@ -50,6 +51,7 @@ public: private: RemoteServerConfiguration remote_server_configuration; + std::string no_proxy_hosts; std::shared_ptr fetcher; std::mutex cache_mutex; diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h index df8d86f379c..81b491b0eda 100644 --- a/src/Common/Scheduler/ISchedulerNode.h +++ b/src/Common/Scheduler/ISchedulerNode.h @@ -11,10 +11,10 @@ #include #include +#include #include #include -#include #include #include #include @@ -30,6 +30,8 @@ namespace ErrorCodes } class ISchedulerNode; +class EventQueue; +using EventId = UInt64; inline const Poco::Util::AbstractConfiguration & emptyConfig() { @@ -82,6 +84,115 @@ struct SchedulerNodeInfo } }; + +/* + * Node of hierarchy for scheduling requests for resource. Base class for all + * kinds of scheduling elements (queues, policies, constraints and schedulers). + * + * Root node is a scheduler, which has it's thread to dequeue requests, + * execute requests (see ResourceRequest) and process events in a thread-safe manner. + * Immediate children of the scheduler represent independent resources. + * Each resource has it's own hierarchy to achieve required scheduling policies. + * Non-leaf nodes do not hold requests, but keep scheduling state + * (e.g. consumption history, amount of in-flight requests, etc). + * Leafs of hierarchy are queues capable of holding pending requests. + * + * scheduler (SchedulerRoot) + * / \ + * constraint constraint (SemaphoreConstraint) + * | | + * policy policy (PriorityPolicy) + * / \ / \ + * q1 q2 q3 q4 (FifoQueue) + * + * Dequeueing request from an inner node will dequeue request from one of active leaf-queues in its subtree. + * Node is considered to be active iff: + * - it has at least one pending request in one of leaves of it's subtree; + * - and enforced constraints, if any, are satisfied + * (e.g. amount of concurrent requests is not greater than some number). + * + * All methods must be called only from scheduler thread for thread-safety. + */ +class ISchedulerNode : public boost::intrusive::list_base_hook<>, private boost::noncopyable +{ +public: + explicit ISchedulerNode(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + : event_queue(event_queue_) + , info(config, config_prefix) + {} + + virtual ~ISchedulerNode() = default; + + /// Checks if two nodes configuration is equal + virtual bool equals(ISchedulerNode * other) + { + return info.equals(other->info); + } + + /// Attach new child + virtual void attachChild(const std::shared_ptr & child) = 0; + + /// Detach and destroy child + virtual void removeChild(ISchedulerNode * child) = 0; + + /// Get attached child by name + virtual ISchedulerNode * getChild(const String & child_name) = 0; + + /// Activation of child due to the first pending request + /// Should be called on leaf node (i.e. queue) to propagate activation signal through chain to the root + virtual void activateChild(ISchedulerNode * child) = 0; + + /// Returns true iff node is active + virtual bool isActive() = 0; + + /// Returns number of active children + virtual size_t activeChildren() = 0; + + /// Returns the first request to be executed as the first component of resulting pair. + /// The second pair component is `true` iff node is still active after dequeueing. + virtual std::pair dequeueRequest() = 0; + + /// Returns full path string using names of every parent + String getPath() + { + String result; + ISchedulerNode * ptr = this; + while (ptr->parent) + { + result = "/" + ptr->basename + result; + ptr = ptr->parent; + } + return result.empty() ? "/" : result; + } + + /// Attach to a parent (used by attachChild) + virtual void setParent(ISchedulerNode * parent_) + { + parent = parent_; + } + +protected: + /// Notify parents about the first pending request or constraint becoming satisfied. + /// Postponed to be handled in scheduler thread, so it is intended to be called from outside. + void scheduleActivation(); + +public: + EventQueue * const event_queue; + String basename; + SchedulerNodeInfo info; + ISchedulerNode * parent = nullptr; + EventId activation_event_id = 0; // Valid for `ISchedulerNode` placed in EventQueue::activations + + /// Introspection + std::atomic dequeued_requests{0}; + std::atomic canceled_requests{0}; + std::atomic dequeued_cost{0}; + std::atomic canceled_cost{0}; + std::atomic busy_periods{0}; +}; + +using SchedulerNodePtr = std::shared_ptr; + /* * Simple waitable thread-safe FIFO task queue. * Intended to hold postponed events for later handling (usually by scheduler thread). @@ -89,57 +200,70 @@ struct SchedulerNodeInfo class EventQueue { public: - using Event = std::function; + using Task = std::function; + + static constexpr EventId not_postponed = 0; + using TimePoint = std::chrono::system_clock::time_point; using Duration = std::chrono::system_clock::duration; - static constexpr UInt64 not_postponed = 0; + + struct Event + { + const EventId event_id; + Task task; + + Event(EventId event_id_, Task && task_) + : event_id(event_id_) + , task(std::move(task_)) + {} + }; struct Postponed { TimePoint key; - UInt64 id; // for canceling - std::unique_ptr event; + EventId event_id; // for canceling + std::unique_ptr task; - Postponed(TimePoint key_, UInt64 id_, Event && event_) + Postponed(TimePoint key_, EventId event_id_, Task && task_) : key(key_) - , id(id_) - , event(std::make_unique(std::move(event_))) + , event_id(event_id_) + , task(std::make_unique(std::move(task_))) {} bool operator<(const Postponed & rhs) const { - return std::tie(key, id) > std::tie(rhs.key, rhs.id); // reversed for min-heap + return std::tie(key, event_id) > std::tie(rhs.key, rhs.event_id); // reversed for min-heap } }; /// Add an `event` to be processed after `until` time point. - /// Returns a unique id for canceling. - [[nodiscard]] UInt64 postpone(TimePoint until, Event && event) + /// Returns a unique event id for canceling. + [[nodiscard]] EventId postpone(TimePoint until, Task && task) { std::unique_lock lock{mutex}; if (postponed.empty() || until < postponed.front().key) pending.notify_one(); - auto id = ++last_id; - postponed.emplace_back(until, id, std::move(event)); + auto event_id = ++last_event_id; + postponed.emplace_back(until, event_id, std::move(task)); std::push_heap(postponed.begin(), postponed.end()); - return id; + return event_id; } /// Cancel a postponed event using its unique id. /// NOTE: Only postponed events can be canceled. /// NOTE: If you need to cancel enqueued event, consider doing your actions inside another enqueued /// NOTE: event instead. This ensures that all previous events are processed. - bool cancelPostponed(UInt64 postponed_id) + bool cancelPostponed(EventId postponed_event_id) { - if (postponed_id == not_postponed) + if (postponed_event_id == not_postponed) return false; std::unique_lock lock{mutex}; for (auto i = postponed.begin(), e = postponed.end(); i != e; ++i) { - if (i->id == postponed_id) + if (i->event_id == postponed_event_id) { postponed.erase(i); - // It is O(n), but we do not expect either big heaps or frequent cancels. So it is fine. + // It is O(n), but we do not expect neither big heaps nor frequent cancels. So it is fine. std::make_heap(postponed.begin(), postponed.end()); return true; } @@ -148,11 +272,23 @@ public: } /// Add an `event` for immediate processing - void enqueue(Event && event) + void enqueue(Task && task) { std::unique_lock lock{mutex}; - bool was_empty = queue.empty(); - queue.emplace_back(event); + bool was_empty = events.empty() && activations.empty(); + auto event_id = ++last_event_id; + events.emplace_back(event_id, std::move(task)); + if (was_empty) + pending.notify_one(); + } + + /// Add an activation `event` for immediate processing. Activations use a separate queue for performance reasons. + void enqueueActivation(ISchedulerNode * node) + { + std::unique_lock lock{mutex}; + bool was_empty = events.empty() && activations.empty(); + node->activation_event_id = ++last_event_id; + activations.push_back(*node); if (was_empty) pending.notify_one(); } @@ -163,7 +299,7 @@ public: bool forceProcess() { std::unique_lock lock{mutex}; - if (!queue.empty()) + if (!events.empty() || !activations.empty()) { processQueue(std::move(lock)); return true; @@ -181,7 +317,7 @@ public: bool tryProcess() { std::unique_lock lock{mutex}; - if (!queue.empty()) + if (!events.empty() || !activations.empty()) { processQueue(std::move(lock)); return true; @@ -205,7 +341,7 @@ public: std::unique_lock lock{mutex}; while (true) { - if (!queue.empty()) + if (!events.empty() || !activations.empty()) { processQueue(std::move(lock)); return; @@ -269,141 +405,69 @@ private: void processQueue(std::unique_lock && lock) { - Event event = std::move(queue.front()); - queue.pop_front(); + if (events.empty()) + { + processActivation(std::move(lock)); + return; + } + if (activations.empty()) + { + processEvent(std::move(lock)); + return; + } + if (activations.front().activation_event_id < events.front().event_id) + processActivation(std::move(lock)); + else + processEvent(std::move(lock)); + } + + void processActivation(std::unique_lock && lock) + { + ISchedulerNode * node = &activations.front(); + activations.pop_front(); + node->activation_event_id = 0; lock.unlock(); // do not hold queue mutex while processing events - event(); + node->parent->activateChild(node); + } + + void processEvent(std::unique_lock && lock) + { + Task task = std::move(events.front().task); + events.pop_front(); + lock.unlock(); // do not hold queue mutex while processing events + task(); } void processPostponed(std::unique_lock && lock) { - Event event = std::move(*postponed.front().event); + Task task = std::move(*postponed.front().task); std::pop_heap(postponed.begin(), postponed.end()); postponed.pop_back(); lock.unlock(); // do not hold queue mutex while processing events - event(); + task(); } std::mutex mutex; std::condition_variable pending; - std::deque queue; + + // `events` and `activations` logically represent one ordered queue. To preserve the common order we use `EventId` + // Activations are stored in a separate queue for performance reasons (mostly to avoid any allocations) + std::deque events; + boost::intrusive::list activations; + std::vector postponed; - UInt64 last_id = 0; + EventId last_event_id = 0; std::atomic manual_time{TimePoint()}; // for tests only }; -/* - * Node of hierarchy for scheduling requests for resource. Base class for all - * kinds of scheduling elements (queues, policies, constraints and schedulers). - * - * Root node is a scheduler, which has it's thread to dequeue requests, - * execute requests (see ResourceRequest) and process events in a thread-safe manner. - * Immediate children of the scheduler represent independent resources. - * Each resource has it's own hierarchy to achieve required scheduling policies. - * Non-leaf nodes do not hold requests, but keep scheduling state - * (e.g. consumption history, amount of in-flight requests, etc). - * Leafs of hierarchy are queues capable of holding pending requests. - * - * scheduler (SchedulerRoot) - * / \ - * constraint constraint (SemaphoreConstraint) - * | | - * policy policy (PriorityPolicy) - * / \ / \ - * q1 q2 q3 q4 (FifoQueue) - * - * Dequeueing request from an inner node will dequeue request from one of active leaf-queues in its subtree. - * Node is considered to be active iff: - * - it has at least one pending request in one of leaves of it's subtree; - * - and enforced constraints, if any, are satisfied - * (e.g. amount of concurrent requests is not greater than some number). - * - * All methods must be called only from scheduler thread for thread-safety. - */ -class ISchedulerNode : private boost::noncopyable +inline void ISchedulerNode::scheduleActivation() { -public: - explicit ISchedulerNode(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) - : event_queue(event_queue_) - , info(config, config_prefix) - {} - - virtual ~ISchedulerNode() = default; - - /// Checks if two nodes configuration is equal - virtual bool equals(ISchedulerNode * other) + if (likely(parent)) { - return info.equals(other->info); + // The same as `enqueue([this] { parent->activateChild(this); });` but faster + event_queue->enqueueActivation(this); } - - /// Attach new child - virtual void attachChild(const std::shared_ptr & child) = 0; - - /// Detach and destroy child - virtual void removeChild(ISchedulerNode * child) = 0; - - /// Get attached child by name - virtual ISchedulerNode * getChild(const String & child_name) = 0; - - /// Activation of child due to the first pending request - /// Should be called on leaf node (i.e. queue) to propagate activation signal through chain to the root - virtual void activateChild(ISchedulerNode * child) = 0; - - /// Returns true iff node is active - virtual bool isActive() = 0; - - /// Returns number of active children - virtual size_t activeChildren() = 0; - - /// Returns the first request to be executed as the first component of resulting pair. - /// The second pair component is `true` iff node is still active after dequeueing. - virtual std::pair dequeueRequest() = 0; - - /// Returns full path string using names of every parent - String getPath() - { - String result; - ISchedulerNode * ptr = this; - while (ptr->parent) - { - result = "/" + ptr->basename + result; - ptr = ptr->parent; - } - return result.empty() ? "/" : result; - } - - /// Attach to a parent (used by attachChild) - virtual void setParent(ISchedulerNode * parent_) - { - parent = parent_; - } - -protected: - /// Notify parents about the first pending request or constraint becoming satisfied. - /// Postponed to be handled in scheduler thread, so it is intended to be called from outside. - void scheduleActivation() - { - if (likely(parent)) - { - event_queue->enqueue([this] { parent->activateChild(this); }); - } - } - -public: - EventQueue * const event_queue; - String basename; - SchedulerNodeInfo info; - ISchedulerNode * parent = nullptr; - - /// Introspection - std::atomic dequeued_requests{0}; - std::atomic canceled_requests{0}; - std::atomic dequeued_cost{0}; - std::atomic canceled_cost{0}; - std::atomic busy_periods{0}; -}; - -using SchedulerNodePtr = std::shared_ptr; +} } diff --git a/src/Common/Scheduler/Nodes/tests/gtest_event_queue.cpp b/src/Common/Scheduler/Nodes/tests/gtest_event_queue.cpp new file mode 100644 index 00000000000..07798f78080 --- /dev/null +++ b/src/Common/Scheduler/Nodes/tests/gtest_event_queue.cpp @@ -0,0 +1,143 @@ +#include +#include + +#include + +using namespace DB; + +class FakeSchedulerNode : public ISchedulerNode +{ +public: + explicit FakeSchedulerNode(String & log_, EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {}) + : ISchedulerNode(event_queue_, config, config_prefix) + , log(log_) + {} + + void attachChild(const SchedulerNodePtr & child) override + { + log += " +" + child->basename; + } + + void removeChild(ISchedulerNode * child) override + { + log += " -" + child->basename; + } + + ISchedulerNode * getChild(const String & /* child_name */) override + { + return nullptr; + } + + void activateChild(ISchedulerNode * child) override + { + log += " A" + child->basename; + } + + bool isActive() override + { + return false; + } + + size_t activeChildren() override + { + return 0; + } + + std::pair dequeueRequest() override + { + log += " D"; + return {nullptr, false}; + } + +private: + String & log; +}; + +struct QueueTest { + String log; + EventQueue event_queue; + FakeSchedulerNode root_node; + + QueueTest() + : root_node(log, &event_queue) + {} + + SchedulerNodePtr makeNode(const String & name) + { + auto node = std::make_shared(log, &event_queue); + node->basename = name; + node->setParent(&root_node); + return std::static_pointer_cast(node); + } + + void process(EventQueue::TimePoint now, const String & expected_log, size_t limit = size_t(-1)) + { + event_queue.setManualTime(now); + for (;limit > 0; limit--) + { + if (!event_queue.tryProcess()) + break; + } + EXPECT_EQ(log, expected_log); + log.clear(); + } + + void activate(const SchedulerNodePtr & node) + { + event_queue.enqueueActivation(node.get()); + } + + void event(const String & text) + { + event_queue.enqueue([this, text] { log += " " + text; }); + } + + EventId postpone(EventQueue::TimePoint until, const String & text) + { + return event_queue.postpone(until, [this, text] { log += " " + text; }); + } + + void cancel(EventId event_id) + { + event_queue.cancelPostponed(event_id); + } +}; + +TEST(SchedulerEventQueue, Smoke) +{ + QueueTest t; + + using namespace std::chrono_literals; + + EventQueue::TimePoint start = std::chrono::system_clock::now(); + t.process(start, "", 0); + + // Activations + auto node1 = t.makeNode("1"); + auto node2 = t.makeNode("2"); + t.activate(node2); + t.activate(node1); + t.process(start + 42s, " A2 A1"); + + // Events + t.event("E1"); + t.event("E2"); + t.process(start + 100s, " E1 E2"); + + // Postponed events + t.postpone(start + 200s, "P200"); + auto p190 = t.postpone(start + 200s, "P190"); + t.postpone(start + 150s, "P150"); + t.postpone(start + 175s, "P175"); + t.process(start + 180s, " P150 P175"); + t.event("E3"); + t.cancel(p190); + t.process(start + 300s, " E3 P200"); + + // Ordering of events and activations + t.event("E1"); + t.activate(node1); + t.event("E2"); + t.activate(node2); + t.process(start + 300s, " E1 A1 E2 A2"); +} diff --git a/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp b/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp index 9703227ccfc..6cfccb252fa 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp @@ -5,8 +5,6 @@ #include #include -#include "Common/Scheduler/ISchedulerNode.h" -#include "Common/Scheduler/ResourceRequest.h" using namespace DB; diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp index ba7595ae6d7..80464f38082 100644 --- a/src/Common/StatusFile.cpp +++ b/src/Common/StatusFile.cpp @@ -85,9 +85,18 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_) /// Write information about current server instance to the file. WriteBufferFromFileDescriptor out(fd, 1024); - fill(out); - /// Finalize here to avoid throwing exceptions in destructor. - out.finalize(); + try + { + fill(out); + /// Finalize here to avoid throwing exceptions in destructor. + out.finalize(); + } + catch (...) + { + /// Finalize in case of exception to avoid throwing exceptions in destructor + out.finalize(); + throw; + } } catch (...) { diff --git a/src/Common/StringUtils.h b/src/Common/StringUtils.h index fe5fc3c058f..e4c7ab3e80c 100644 --- a/src/Common/StringUtils.h +++ b/src/Common/StringUtils.h @@ -140,6 +140,18 @@ inline bool isPrintableASCII(char c) return uc >= 32 && uc <= 126; /// 127 is ASCII DEL. } +inline bool isCSIParameterByte(char c) +{ + uint8_t uc = c; + return uc >= 0x30 && uc <= 0x3F; /// ASCII 0–9:;<=>? +} + +inline bool isCSIIntermediateByte(char c) +{ + uint8_t uc = c; + return uc >= 0x20 && uc <= 0x2F; /// ASCII !"#$%&'()*+,-./ +} + inline bool isCSIFinalByte(char c) { uint8_t uc = c; diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 15803db4929..a9307c3be99 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -10,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 95906c63349..b87fcf419d3 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -1,9 +1,7 @@ #pragma once -#include #include #include -#include #include #include @@ -27,12 +25,13 @@ M(ZooKeeperLogElement) \ M(ProcessorProfileLogElement) \ M(TextLogElement) \ - M(S3QueueLogElement) \ + M(ObjectStorageQueueLogElement) \ M(FilesystemCacheLogElement) \ M(FilesystemReadPrefetchesLogElement) \ M(AsynchronousInsertLogElement) \ M(BackupLogElement) \ - M(BlobStorageLogElement) + M(BlobStorageLogElement) \ + M(ErrorLogElement) namespace Poco { diff --git a/src/Common/TerminalSize.cpp b/src/Common/TerminalSize.cpp index bc5b4474384..8139f4f7616 100644 --- a/src/Common/TerminalSize.cpp +++ b/src/Common/TerminalSize.cpp @@ -13,17 +13,17 @@ namespace DB::ErrorCodes extern const int SYSTEM_ERROR; } -uint16_t getTerminalWidth() +uint16_t getTerminalWidth(int in_fd, int err_fd) { struct winsize terminal_size {}; - if (isatty(STDIN_FILENO)) + if (isatty(in_fd)) { - if (ioctl(STDIN_FILENO, TIOCGWINSZ, &terminal_size)) + if (ioctl(in_fd, TIOCGWINSZ, &terminal_size)) throw DB::ErrnoException(DB::ErrorCodes::SYSTEM_ERROR, "Cannot obtain terminal window size (ioctl TIOCGWINSZ)"); } - else if (isatty(STDERR_FILENO)) + else if (isatty(err_fd)) { - if (ioctl(STDERR_FILENO, TIOCGWINSZ, &terminal_size)) + if (ioctl(err_fd, TIOCGWINSZ, &terminal_size)) throw DB::ErrnoException(DB::ErrorCodes::SYSTEM_ERROR, "Cannot obtain terminal window size (ioctl TIOCGWINSZ)"); } /// Default - 0. diff --git a/src/Common/TerminalSize.h b/src/Common/TerminalSize.h index b5fc6de7921..f1334f2bcb9 100644 --- a/src/Common/TerminalSize.h +++ b/src/Common/TerminalSize.h @@ -1,16 +1,16 @@ #pragma once #include +#include #include namespace po = boost::program_options; -uint16_t getTerminalWidth(); +uint16_t getTerminalWidth(int in_fd = STDIN_FILENO, int err_fd = STDERR_FILENO); /** Creates po::options_description with name and an appropriate size for option displaying * when program is called with option --help * */ po::options_description createOptionsDescription(const std::string &caption, unsigned short terminal_width); /// NOLINT - diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index 4c1320db27a..a581ff1766f 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -41,21 +41,9 @@ Throttler::Throttler(size_t max_speed_, size_t limit_, const char * limit_exceed UInt64 Throttler::add(size_t amount) { // Values obtained under lock to be checked after release - size_t count_value; - double tokens_value; - { - std::lock_guard lock(mutex); - auto now = clock_gettime_ns_adjusted(prev_ns); - if (max_speed) - { - double delta_seconds = prev_ns ? static_cast(now - prev_ns) / NS : 0; - tokens = std::min(tokens + max_speed * delta_seconds - amount, max_burst); - } - count += amount; - count_value = count; - tokens_value = tokens; - prev_ns = now; - } + size_t count_value = 0; + double tokens_value = 0.0; + addImpl(amount, count_value, tokens_value); if (limit && count_value > limit) throw Exception::createDeprecated(limit_exceeded_exception_message + std::string(" Maximum: ") + toString(limit), ErrorCodes::LIMIT_EXCEEDED); @@ -77,6 +65,21 @@ UInt64 Throttler::add(size_t amount) return static_cast(sleep_time_ns); } +void Throttler::addImpl(size_t amount, size_t & count_value, double & tokens_value) +{ + std::lock_guard lock(mutex); + auto now = clock_gettime_ns_adjusted(prev_ns); + if (max_speed) + { + double delta_seconds = prev_ns ? static_cast(now - prev_ns) / NS : 0; + tokens = std::min(tokens + max_speed * delta_seconds - amount, max_burst); + } + count += amount; + count_value = count; + tokens_value = tokens; + prev_ns = now; +} + void Throttler::reset() { std::lock_guard lock(mutex); @@ -98,4 +101,14 @@ bool Throttler::isThrottling() const return false; } +Int64 Throttler::getAvailable() +{ + // To update bucket state and receive current number of token in a thread-safe way + size_t count_value = 0; + double tokens_value = 0.0; + addImpl(0, count_value, tokens_value); + + return static_cast(tokens_value); +} + } diff --git a/src/Common/Throttler.h b/src/Common/Throttler.h index 7508065096b..32293d7400f 100644 --- a/src/Common/Throttler.h +++ b/src/Common/Throttler.h @@ -57,7 +57,13 @@ public: /// Is throttler already accumulated some sleep time and throttling. bool isThrottling() const; + Int64 getAvailable(); + UInt64 getMaxSpeed() const { return static_cast(max_speed); } + UInt64 getMaxBurst() const { return static_cast(max_burst); } + private: + void addImpl(size_t amount, size_t & count_value, double & tokens_value); + size_t count{0}; const size_t max_speed{0}; /// in tokens per second. const size_t max_burst{0}; /// in tokens. diff --git a/src/Common/TransactionID.h b/src/Common/TransactionID.h index 97d0072bc14..466f3f5343b 100644 --- a/src/Common/TransactionID.h +++ b/src/Common/TransactionID.h @@ -108,7 +108,7 @@ struct fmt::formatter } template - auto format(const DB::TransactionID & tid, FormatContext & context) + auto format(const DB::TransactionID & tid, FormatContext & context) const { return fmt::format_to(context.out(), "({}, {}, {})", tid.start_csn, tid.local_tid, tid.host_id); } diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp index 8c8c8e8327b..dd24cb20933 100644 --- a/src/Common/UTF8Helpers.cpp +++ b/src/Common/UTF8Helpers.cpp @@ -103,7 +103,7 @@ template size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept { UTF8Decoder decoder; - int isEscapeSequence = false; + bool is_escape_sequence = false; size_t width = 0; size_t rollback = 0; for (size_t i = 0; i < size; ++i) @@ -116,6 +116,9 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l while (i + 15 < size) { + if (is_escape_sequence) + break; + __m128i bytes = _mm_loadu_si128(reinterpret_cast(&data[i])); const uint16_t non_regular_width_mask = _mm_movemask_epi8( @@ -132,25 +135,28 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l } else { - if (isEscapeSequence) - { - break; - } - else - { - i += 16; - width += 16; - } + i += 16; + width += 16; } } #endif while (i < size && isPrintableASCII(data[i])) { - if (!isEscapeSequence) + bool ignore_width = is_escape_sequence && (isCSIParameterByte(data[i]) || isCSIIntermediateByte(data[i])); + + if (ignore_width || (data[i] == '[' && is_escape_sequence)) + { + /// don't count the width + } + else if (is_escape_sequence && isCSIFinalByte(data[i])) + { + is_escape_sequence = false; + } + else + { ++width; - else if (isCSIFinalByte(data[i]) && data[i - 1] != '\x1b') - isEscapeSequence = false; /// end of CSI escape sequence reached + } ++i; } @@ -178,7 +184,7 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l // special treatment for '\t' and for ESC size_t next_width = width; if (decoder.codepoint == '\x1b') - isEscapeSequence = true; + is_escape_sequence = true; else if (decoder.codepoint == '\t') next_width += 8 - (prefix + width) % 8; else diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index ddd30c4eef2..2c6cbc4a5d5 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -559,6 +559,8 @@ public: /// Useful to check owner of ephemeral node. virtual int64_t getSessionID() const = 0; + virtual String tryGetAvailabilityZone() { return ""; } + /// If the method will throw an exception, callbacks won't be called. /// /// After the method is executed successfully, you must wait for callbacks @@ -635,10 +637,6 @@ public: virtual const DB::KeeperFeatureFlags * getKeeperFeatureFlags() const { return nullptr; } - /// A ZooKeeper session can have an optional deadline set on it. - /// After it has been reached, the session needs to be finalized. - virtual bool hasReachedDeadline() const = 0; - /// Expire session and finish all pending requests virtual void finalize(const String & reason) = 0; }; @@ -647,7 +645,7 @@ public: template <> struct fmt::formatter : fmt::formatter { - constexpr auto format(Coordination::Error code, auto & ctx) + constexpr auto format(Coordination::Error code, auto & ctx) const { return formatter::format(Coordination::errorMessage(code), ctx); } diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 2774055652c..2194ad015bf 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -39,7 +39,6 @@ public: ~TestKeeper() override; bool isExpired() const override { return expired; } - bool hasReachedDeadline() const override { return false; } Int8 getConnectedNodeIdx() const override { return 0; } String getConnectedHostPort() const override { return "TestKeeper:0000"; } int32_t getConnectionXid() const override { return 0; } diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 4ec44a39136..56db9adb787 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -16,10 +17,12 @@ #include #include #include +#include #include "Common/ZooKeeper/IKeeper.h" #include #include #include +#include #include #include @@ -55,70 +58,120 @@ static void check(Coordination::Error code, const std::string & path) throw KeeperException::fromPath(code, path); } +UInt64 getSecondsUntilReconnect(const ZooKeeperArgs & args) +{ + std::uniform_int_distribution fallback_session_lifetime_distribution + { + args.fallback_session_lifetime.min_sec, + args.fallback_session_lifetime.max_sec, + }; + UInt32 session_lifetime_seconds = fallback_session_lifetime_distribution(thread_local_rng); + return session_lifetime_seconds; +} -void ZooKeeper::init(ZooKeeperArgs args_) +void ZooKeeper::updateAvailabilityZones() +{ + ShuffleHosts shuffled_hosts = shuffleHosts(); + + for (const auto & node : shuffled_hosts) + { + try + { + ShuffleHosts single_node{node}; + auto tmp_impl = std::make_unique(single_node, args, zk_log); + auto idx = node.original_index; + availability_zones[idx] = tmp_impl->tryGetAvailabilityZone(); + LOG_TEST(log, "Got availability zone for {}: {}", args.hosts[idx], availability_zones[idx]); + } + catch (...) + { + DB::tryLogCurrentException(log, "Failed to get availability zone for " + node.host); + } + } + LOG_DEBUG(log, "Updated availability zones: [{}]", fmt::join(availability_zones, ", ")); +} + +void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr existing_impl) { args = std::move(args_); log = getLogger("ZooKeeper"); - if (args.implementation == "zookeeper") + if (existing_impl) + { + chassert(args.implementation == "zookeeper"); + impl = std::move(existing_impl); + LOG_INFO(log, "Switching to connection to a more optimal node {}", impl->getConnectedHostPort()); + } + else if (args.implementation == "zookeeper") { if (args.hosts.empty()) throw KeeperException::fromMessage(Coordination::Error::ZBADARGUMENTS, "No hosts passed to ZooKeeper constructor."); - Coordination::ZooKeeper::Nodes nodes; - nodes.reserve(args.hosts.size()); + chassert(args.availability_zones.size() == args.hosts.size()); + if (availability_zones.empty()) + { + /// availability_zones is empty on server startup or after config reloading + /// We will keep the az info when starting new sessions + availability_zones = args.availability_zones; + LOG_TEST(log, "Availability zones from config: [{}], client: {}", fmt::join(availability_zones, ", "), args.client_availability_zone); + if (args.availability_zone_autodetect) + updateAvailabilityZones(); + } + chassert(availability_zones.size() == args.hosts.size()); /// Shuffle the hosts to distribute the load among ZooKeeper nodes. - std::vector shuffled_hosts = shuffleHosts(); + ShuffleHosts shuffled_hosts = shuffleHosts(); - bool dns_error = false; - for (auto & host : shuffled_hosts) - { - auto & host_string = host.host; - try - { - const bool secure = startsWith(host_string, "secure://"); - - if (secure) - host_string.erase(0, strlen("secure://")); - - /// We want to resolve all hosts without DNS cache for keeper connection. - Coordination::DNSResolver::instance().removeHostFromCache(host_string); - - const Poco::Net::SocketAddress host_socket_addr{host_string}; - LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, host_socket_addr.toString()); - nodes.emplace_back(Coordination::ZooKeeper::Node{host_socket_addr, host.original_index, secure}); - } - catch (const Poco::Net::HostNotFoundException & e) - { - /// Most likely it's misconfiguration and wrong hostname was specified - LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", host_string, e.displayText()); - } - catch (const Poco::Net::DNSException & e) - { - /// Most likely DNS is not available now - dns_error = true; - LOG_ERROR(log, "Cannot use ZooKeeper host {} due to DNS error: {}", host_string, e.displayText()); - } - } - - if (nodes.empty()) - { - /// For DNS errors we throw exception with ZCONNECTIONLOSS code, so it will be considered as hardware error, not user error - if (dns_error) - throw KeeperException::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Cannot resolve any of provided ZooKeeper hosts due to DNS error"); - else - throw KeeperException::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Cannot use any of provided ZooKeeper nodes"); - } - - impl = std::make_unique(nodes, args, zk_log); + impl = std::make_unique(shuffled_hosts, args, zk_log); + Int8 node_idx = impl->getConnectedNodeIdx(); if (args.chroot.empty()) LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ",")); else LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot); + + + /// If the balancing strategy has an optimal node then it will be the first in the list + bool connected_to_suboptimal_node = node_idx != shuffled_hosts[0].original_index; + bool respect_az = args.prefer_local_availability_zone && !args.client_availability_zone.empty(); + bool may_benefit_from_reconnecting = respect_az || args.get_priority_load_balancing.hasOptimalNode(); + if (connected_to_suboptimal_node && may_benefit_from_reconnecting) + { + auto reconnect_timeout_sec = getSecondsUntilReconnect(args); + LOG_DEBUG(log, "Connected to a suboptimal ZooKeeper host ({}, index {})." + " To preserve balance in ZooKeeper usage, this ZooKeeper session will expire in {} seconds", + impl->getConnectedHostPort(), node_idx, reconnect_timeout_sec); + + auto reconnect_task_holder = DB::Context::getGlobalContextInstance()->getSchedulePool().createTask("ZKReconnect", [this, optimal_host = shuffled_hosts[0]]() + { + try + { + LOG_DEBUG(log, "Trying to connect to a more optimal node {}", optimal_host.host); + ShuffleHosts node{optimal_host}; + std::unique_ptr new_impl = std::make_unique(node, args, zk_log); + Int8 new_node_idx = new_impl->getConnectedNodeIdx(); + + /// Maybe the node was unavailable when getting AZs first time, update just in case + if (args.availability_zone_autodetect && availability_zones[new_node_idx].empty()) + { + availability_zones[new_node_idx] = new_impl->tryGetAvailabilityZone(); + LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[new_node_idx]); + } + + optimal_impl = std::move(new_impl); + impl->finalize("Connected to a more optimal node"); + } + catch (...) + { + LOG_WARNING(log, "Failed to connect to a more optimal ZooKeeper, will try again later: {}", DB::getCurrentExceptionMessage(/*with_stacktrace*/ false)); + (*reconnect_task)->scheduleAfter(getSecondsUntilReconnect(args) * 1000); + } + }); + reconnect_task = std::make_unique(std::move(reconnect_task_holder)); + (*reconnect_task)->activate(); + (*reconnect_task)->scheduleAfter(reconnect_timeout_sec * 1000); + } } else if (args.implementation == "testkeeper") { @@ -152,29 +205,53 @@ void ZooKeeper::init(ZooKeeperArgs args_) } } +ZooKeeper::~ZooKeeper() +{ + if (reconnect_task) + (*reconnect_task)->deactivate(); +} ZooKeeper::ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr zk_log_) : zk_log(std::move(zk_log_)) { - init(args_); + init(args_, /*existing_impl*/ {}); +} + + +ZooKeeper::ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr zk_log_, Strings availability_zones_, std::unique_ptr existing_impl) + : availability_zones(std::move(availability_zones_)), zk_log(std::move(zk_log_)) +{ + if (availability_zones.size() != args_.hosts.size()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Argument sizes mismatch: availability_zones count {} and hosts count {}", + availability_zones.size(), args_.hosts.size()); + init(args_, std::move(existing_impl)); } ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_) : zk_log(std::move(zk_log_)) { - init(ZooKeeperArgs(config, config_name)); + init(ZooKeeperArgs(config, config_name), /*existing_impl*/ {}); } -std::vector ZooKeeper::shuffleHosts() const +ShuffleHosts ZooKeeper::shuffleHosts() const { - std::function get_priority = args.get_priority_load_balancing.getPriorityFunc(args.get_priority_load_balancing.load_balancing, 0, args.hosts.size()); - std::vector shuffle_hosts; + std::function get_priority = args.get_priority_load_balancing.getPriorityFunc( + args.get_priority_load_balancing.load_balancing, /* offset for first_or_random */ 0, args.hosts.size()); + ShuffleHosts shuffle_hosts; for (size_t i = 0; i < args.hosts.size(); ++i) { ShuffleHost shuffle_host; shuffle_host.host = args.hosts[i]; shuffle_host.original_index = static_cast(i); + + shuffle_host.secure = startsWith(shuffle_host.host, "secure://"); + if (shuffle_host.secure) + shuffle_host.host.erase(0, strlen("secure://")); + + if (!args.client_availability_zone.empty() && !availability_zones[i].empty()) + shuffle_host.az_info = availability_zones[i] == args.client_availability_zone ? ShuffleHost::SAME : ShuffleHost::OTHER; + if (get_priority) shuffle_host.priority = get_priority(i); shuffle_host.randomize(); @@ -1023,7 +1100,10 @@ ZooKeeperPtr ZooKeeper::create(const Poco::Util::AbstractConfiguration & config, ZooKeeperPtr ZooKeeper::startNewSession() const { - auto res = std::shared_ptr(new ZooKeeper(args, zk_log)); + if (reconnect_task) + (*reconnect_task)->deactivate(); + + auto res = std::shared_ptr(new ZooKeeper(args, zk_log, availability_zones, std::move(optimal_impl))); res->initSession(); return res; } @@ -1456,6 +1536,16 @@ int32_t ZooKeeper::getConnectionXid() const return impl->getConnectionXid(); } +String ZooKeeper::getConnectedHostAvailabilityZone() const +{ + if (args.implementation != "zookeeper" || !impl) + return ""; + Int8 idx = impl->getConnectedNodeIdx(); + if (idx < 0) + return ""; /// session expired + return availability_zones.at(idx); +} + size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses) { if (responses.empty()) diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 08ff60a80cf..4ae2cfa6096 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -32,6 +32,7 @@ namespace DB { class ZooKeeperLog; class ZooKeeperWithFaultInjection; +class BackgroundSchedulePoolTaskHolder; namespace ErrorCodes { @@ -48,11 +49,23 @@ constexpr size_t MULTI_BATCH_SIZE = 100; struct ShuffleHost { + enum AvailabilityZoneInfo + { + SAME = 0, + UNKNOWN = 1, + OTHER = 2, + }; + String host; + bool secure = false; UInt8 original_index = 0; + AvailabilityZoneInfo az_info = UNKNOWN; Priority priority; UInt64 random = 0; + /// We should resolve it each time without caching + mutable std::optional address; + void randomize() { random = thread_local_rng(); @@ -60,11 +73,13 @@ struct ShuffleHost static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs) { - return std::forward_as_tuple(lhs.priority, lhs.random) - < std::forward_as_tuple(rhs.priority, rhs.random); + return std::forward_as_tuple(lhs.az_info, lhs.priority, lhs.random) + < std::forward_as_tuple(rhs.az_info, rhs.priority, rhs.random); } }; +using ShuffleHosts = std::vector; + struct RemoveException { explicit RemoveException(std::string_view path_ = "", bool remove_subtree_ = true) @@ -197,6 +212,9 @@ class ZooKeeper explicit ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr zk_log_ = nullptr); + /// Allows to keep info about availability zones when starting a new session + ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr zk_log_, Strings availability_zones_, std::unique_ptr existing_impl); + /** Config of the form: @@ -228,7 +246,9 @@ public: using Ptr = std::shared_ptr; using ErrorsList = std::initializer_list; - std::vector shuffleHosts() const; + ~ZooKeeper(); + + ShuffleHosts shuffleHosts() const; static Ptr create(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, @@ -596,8 +616,6 @@ public: UInt32 getSessionUptime() const { return static_cast(session_uptime.elapsedSeconds()); } - bool hasReachedDeadline() const { return impl->hasReachedDeadline(); } - uint64_t getSessionTimeoutMS() const { return args.session_timeout_ms; } void setServerCompletelyStarted(); @@ -606,6 +624,8 @@ public: String getConnectedHostPort() const; int32_t getConnectionXid() const; + String getConnectedHostAvailabilityZone() const; + const DB::KeeperFeatureFlags * getKeeperFeatureFlags() const { return impl->getKeeperFeatureFlags(); } /// Checks that our session was not killed, and allows to avoid applying a request from an old lost session. @@ -625,7 +645,8 @@ public: void addCheckSessionOp(Coordination::Requests & requests) const; private: - void init(ZooKeeperArgs args_); + void init(ZooKeeperArgs args_, std::unique_ptr existing_impl); + void updateAvailabilityZones(); /// The following methods don't any throw exceptions but return error codes. Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); @@ -690,15 +711,20 @@ private: } std::unique_ptr impl; + mutable std::unique_ptr optimal_impl; ZooKeeperArgs args; + Strings availability_zones; + LoggerPtr log = nullptr; std::shared_ptr zk_log; AtomicStopwatch session_uptime; int32_t session_node_version; + + std::unique_ptr reconnect_task; }; diff --git a/src/Common/ZooKeeper/ZooKeeperArgs.cpp b/src/Common/ZooKeeper/ZooKeeperArgs.cpp index a581b6a7f38..18dff779a70 100644 --- a/src/Common/ZooKeeper/ZooKeeperArgs.cpp +++ b/src/Common/ZooKeeper/ZooKeeperArgs.cpp @@ -5,6 +5,9 @@ #include #include #include +#include +#include +#include #include namespace DB @@ -53,6 +56,7 @@ ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, c ZooKeeperArgs::ZooKeeperArgs(const String & hosts_string) { splitInto<','>(hosts, hosts_string); + availability_zones.resize(hosts.size()); } void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfiguration & config) @@ -103,8 +107,11 @@ void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfig for (const auto & key : keys) { if (startsWith(key, "server")) + { hosts.push_back( (secure ? "secure://" : "") + config.getString(raft_configuration_key + "." + key + ".hostname") + ":" + tcp_port); + availability_zones.push_back(config.getString(raft_configuration_key + "." + key + ".availability_zone", "")); + } } static constexpr std::array load_balancing_keys @@ -123,11 +130,15 @@ void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfig auto load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); if (!load_balancing) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str); - get_priority_load_balancing.load_balancing = *load_balancing; + get_priority_load_balancing = DB::GetPriorityForLoadBalancing(*load_balancing, thread_local_rng() % hosts.size()); break; } } + availability_zone_autodetect = config.getBool(std::string{config_name} + ".availability_zone_autodetect", false); + prefer_local_availability_zone = config.getBool(std::string{config_name} + ".prefer_local_availability_zone", false); + if (prefer_local_availability_zone) + client_availability_zone = DB::PlacementInfo::PlacementInfo::instance().getAvailabilityZone(); } void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) @@ -137,6 +148,8 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_name, keys); + std::optional load_balancing; + for (const auto & key : keys) { if (key.starts_with("node")) @@ -144,6 +157,7 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio hosts.push_back( (config.getBool(config_name + "." + key + ".secure", false) ? "secure://" : "") + config.getString(config_name + "." + key + ".host") + ":" + config.getString(config_name + "." + key + ".port", "2181")); + availability_zones.push_back(config.getString(config_name + "." + key + ".availability_zone", "")); } else if (key == "session_timeout_ms") { @@ -199,6 +213,10 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio { sessions_path = config.getString(config_name + "." + key); } + else if (key == "prefer_local_availability_zone") + { + prefer_local_availability_zone = config.getBool(config_name + "." + key); + } else if (key == "implementation") { implementation = config.getString(config_name + "." + key); @@ -207,10 +225,9 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio { String load_balancing_str = config.getString(config_name + "." + key); /// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`) - auto load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); + load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); if (!load_balancing) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str); - get_priority_load_balancing.load_balancing = *load_balancing; } else if (key == "fallback_session_lifetime") { @@ -224,9 +241,19 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio { use_compression = config.getBool(config_name + "." + key); } + else if (key == "availability_zone_autodetect") + { + availability_zone_autodetect = config.getBool(config_name + "." + key); + } else throw KeeperException(Coordination::Error::ZBADARGUMENTS, "Unknown key {} in config file", key); } + + if (load_balancing) + get_priority_load_balancing = DB::GetPriorityForLoadBalancing(*load_balancing, thread_local_rng() % hosts.size()); + + if (prefer_local_availability_zone) + client_availability_zone = DB::PlacementInfo::PlacementInfo::instance().getAvailabilityZone(); } } diff --git a/src/Common/ZooKeeper/ZooKeeperArgs.h b/src/Common/ZooKeeper/ZooKeeperArgs.h index 27ba173c0c3..945b77bf9c1 100644 --- a/src/Common/ZooKeeper/ZooKeeperArgs.h +++ b/src/Common/ZooKeeper/ZooKeeperArgs.h @@ -32,10 +32,12 @@ struct ZooKeeperArgs String zookeeper_name = "zookeeper"; String implementation = "zookeeper"; Strings hosts; + Strings availability_zones; String auth_scheme; String identity; String chroot; String sessions_path = "/clickhouse/sessions"; + String client_availability_zone; int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; @@ -47,6 +49,8 @@ struct ZooKeeperArgs UInt64 send_sleep_ms = 0; UInt64 recv_sleep_ms = 0; bool use_compression = false; + bool prefer_local_availability_zone = false; + bool availability_zone_autodetect = false; SessionLifetimeConfiguration fallback_session_lifetime = {}; DB::GetPriorityForLoadBalancing get_priority_load_balancing; diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 48bb510e589..dff14f74681 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -9,7 +9,6 @@ #include #include #include -#include namespace Coordination @@ -29,7 +28,7 @@ void ZooKeeperResponse::write(WriteBuffer & out) const Coordination::write(buf.str(), out); } -std::string ZooKeeperRequest::toString() const +std::string ZooKeeperRequest::toString(bool short_format) const { return fmt::format( "XID = {}\n" @@ -37,7 +36,7 @@ std::string ZooKeeperRequest::toString() const "Additional info:\n{}", xid, getOpNum(), - toStringImpl()); + toStringImpl(short_format)); } void ZooKeeperRequest::write(WriteBuffer & out) const @@ -60,7 +59,7 @@ void ZooKeeperSyncRequest::readImpl(ReadBuffer & in) Coordination::read(path, in); } -std::string ZooKeeperSyncRequest::toStringImpl() const +std::string ZooKeeperSyncRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -91,7 +90,7 @@ void ZooKeeperReconfigRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperReconfigRequest::toStringImpl() const +std::string ZooKeeperReconfigRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "joining = {}\nleaving = {}\nnew_members = {}\nversion = {}", @@ -145,7 +144,7 @@ void ZooKeeperAuthRequest::readImpl(ReadBuffer & in) Coordination::read(data, in); } -std::string ZooKeeperAuthRequest::toStringImpl() const +std::string ZooKeeperAuthRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "type = {}\n" @@ -191,7 +190,7 @@ void ZooKeeperCreateRequest::readImpl(ReadBuffer & in) is_sequential = true; } -std::string ZooKeeperCreateRequest::toStringImpl() const +std::string ZooKeeperCreateRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -218,7 +217,7 @@ void ZooKeeperRemoveRequest::writeImpl(WriteBuffer & out) const Coordination::write(version, out); } -std::string ZooKeeperRemoveRequest::toStringImpl() const +std::string ZooKeeperRemoveRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -245,7 +244,7 @@ void ZooKeeperExistsRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperExistsRequest::toStringImpl() const +std::string ZooKeeperExistsRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -272,7 +271,7 @@ void ZooKeeperGetRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperGetRequest::toStringImpl() const +std::string ZooKeeperGetRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -303,7 +302,7 @@ void ZooKeeperSetRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperSetRequest::toStringImpl() const +std::string ZooKeeperSetRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -334,7 +333,7 @@ void ZooKeeperListRequest::readImpl(ReadBuffer & in) Coordination::read(has_watch, in); } -std::string ZooKeeperListRequest::toStringImpl() const +std::string ZooKeeperListRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -356,7 +355,7 @@ void ZooKeeperFilteredListRequest::readImpl(ReadBuffer & in) list_request_type = static_cast(read_request_type); } -std::string ZooKeeperFilteredListRequest::toStringImpl() const +std::string ZooKeeperFilteredListRequest::toStringImpl(bool /*short_format*/) const { return fmt::format( "path = {}\n" @@ -401,7 +400,7 @@ void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperSetACLRequest::toStringImpl() const +std::string ZooKeeperSetACLRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}\nversion = {}", path, version); } @@ -426,7 +425,7 @@ void ZooKeeperGetACLRequest::writeImpl(WriteBuffer & out) const Coordination::write(path, out); } -std::string ZooKeeperGetACLRequest::toStringImpl() const +std::string ZooKeeperGetACLRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}", path); } @@ -455,7 +454,7 @@ void ZooKeeperCheckRequest::readImpl(ReadBuffer & in) Coordination::read(version, in); } -std::string ZooKeeperCheckRequest::toStringImpl() const +std::string ZooKeeperCheckRequest::toStringImpl(bool /*short_format*/) const { return fmt::format("path = {}\nversion = {}", path, version); } @@ -600,8 +599,11 @@ void ZooKeeperMultiRequest::readImpl(ReadBuffer & in) } } -std::string ZooKeeperMultiRequest::toStringImpl() const +std::string ZooKeeperMultiRequest::toStringImpl(bool short_format) const { + if (short_format) + return fmt::format("Subrequests size = {}", requests.size()); + auto out = fmt::memory_buffer(); for (const auto & request : requests) { diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 490c2dce4f8..fd6ec3cd375 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -63,12 +63,12 @@ struct ZooKeeperRequest : virtual Request /// Writes length, xid, op_num, then the rest. void write(WriteBuffer & out) const; - std::string toString() const; + std::string toString(bool short_format = false) const; virtual void writeImpl(WriteBuffer &) const = 0; virtual void readImpl(ReadBuffer &) = 0; - virtual std::string toStringImpl() const { return ""; } + virtual std::string toStringImpl(bool /*short_format*/) const { return ""; } static std::shared_ptr read(ReadBuffer & in); @@ -98,7 +98,7 @@ struct ZooKeeperSyncRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Sync; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -123,7 +123,7 @@ struct ZooKeeperReconfigRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Reconfig; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -176,7 +176,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Auth; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -229,7 +229,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -266,7 +266,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Remove; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -293,7 +293,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Exists; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -320,7 +320,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Get; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -347,7 +347,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::Set; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -375,7 +375,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::List; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -395,7 +395,7 @@ struct ZooKeeperFilteredListRequest final : ZooKeeperListRequest OpNum getOpNum() const override { return OpNum::FilteredList; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; size_t bytesSize() const override { return ZooKeeperListRequest::bytesSize() + sizeof(list_request_type); } }; @@ -428,7 +428,7 @@ struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -469,7 +469,7 @@ struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::SetACL; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return false; } @@ -490,7 +490,7 @@ struct ZooKeeperGetACLRequest final : GetACLRequest, ZooKeeperRequest OpNum getOpNum() const override { return OpNum::GetACL; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override { return true; } @@ -516,7 +516,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; - std::string toStringImpl() const override; + std::string toStringImpl(bool short_format) const override; ZooKeeperResponsePtr makeResponse() const override; bool isReadRequest() const override; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index ed7498b1ac9..8653af51308 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -23,6 +23,9 @@ #include #include +#include +#include + #include "Coordination/KeeperConstants.h" #include "config.h" @@ -338,7 +341,7 @@ ZooKeeper::~ZooKeeper() ZooKeeper::ZooKeeper( - const Nodes & nodes, + const zkutil::ShuffleHosts & nodes, const zkutil::ZooKeeperArgs & args_, std::shared_ptr zk_log_) : args(args_) @@ -426,7 +429,7 @@ ZooKeeper::ZooKeeper( void ZooKeeper::connect( - const Nodes & nodes, + const zkutil::ShuffleHosts & nodes, Poco::Timespan connection_timeout) { if (nodes.empty()) @@ -434,15 +437,51 @@ void ZooKeeper::connect( static constexpr size_t num_tries = 3; bool connected = false; + bool dns_error = false; + + size_t resolved_count = 0; + for (const auto & node : nodes) + { + try + { + const Poco::Net::SocketAddress host_socket_addr{node.host}; + LOG_TRACE(log, "Adding ZooKeeper host {} ({}), az: {}, priority: {}", node.host, host_socket_addr.toString(), node.az_info, node.priority); + node.address = host_socket_addr; + ++resolved_count; + } + catch (const Poco::Net::HostNotFoundException & e) + { + /// Most likely it's misconfiguration and wrong hostname was specified + LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", node.host, e.displayText()); + } + catch (const Poco::Net::DNSException & e) + { + /// Most likely DNS is not available now + dns_error = true; + LOG_ERROR(log, "Cannot use ZooKeeper host {} due to DNS error: {}", node.host, e.displayText()); + } + } + + if (resolved_count == 0) + { + /// For DNS errors we throw exception with ZCONNECTIONLOSS code, so it will be considered as hardware error, not user error + if (dns_error) + throw zkutil::KeeperException::fromMessage( + Coordination::Error::ZCONNECTIONLOSS, "Cannot resolve any of provided ZooKeeper hosts due to DNS error"); + else + throw zkutil::KeeperException::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Cannot use any of provided ZooKeeper nodes"); + } WriteBufferFromOwnString fail_reasons; for (size_t try_no = 0; try_no < num_tries; ++try_no) { - for (size_t i = 0; i < nodes.size(); ++i) + for (const auto & node : nodes) { - const auto & node = nodes[i]; try { + if (!node.address) + continue; + /// Reset the state of previous attempt. if (node.secure) { @@ -458,7 +497,7 @@ void ZooKeeper::connect( socket = Poco::Net::StreamSocket(); } - socket.connect(node.address, connection_timeout); + socket.connect(*node.address, connection_timeout); socket_address = socket.peerAddress(); socket.setReceiveTimeout(args.operation_timeout_ms * 1000); @@ -498,27 +537,11 @@ void ZooKeeper::connect( } original_index = static_cast(node.original_index); - - if (i != 0) - { - std::uniform_int_distribution fallback_session_lifetime_distribution - { - args.fallback_session_lifetime.min_sec, - args.fallback_session_lifetime.max_sec, - }; - UInt32 session_lifetime_seconds = fallback_session_lifetime_distribution(thread_local_rng); - client_session_deadline = clock::now() + std::chrono::seconds(session_lifetime_seconds); - - LOG_DEBUG(log, "Connected to a suboptimal ZooKeeper host ({}, index {})." - " To preserve balance in ZooKeeper usage, this ZooKeeper session will expire in {} seconds", - node.address.toString(), i, session_lifetime_seconds); - } - break; } catch (...) { - fail_reasons << "\n" << getCurrentExceptionMessage(false) << ", " << node.address.toString(); + fail_reasons << "\n" << getCurrentExceptionMessage(false) << ", " << node.address->toString(); } } @@ -532,6 +555,9 @@ void ZooKeeper::connect( bool first = true; for (const auto & node : nodes) { + if (!node.address) + continue; + if (first) first = false; else @@ -540,7 +566,7 @@ void ZooKeeper::connect( if (node.secure) message << "secure://"; - message << node.address.toString(); + message << node.address->toString(); } message << fail_reasons.str() << "\n"; @@ -1153,7 +1179,6 @@ void ZooKeeper::pushRequest(RequestInfo && info) { try { - checkSessionDeadline(); info.time = clock::now(); auto maybe_zk_log = std::atomic_load(&zk_log); if (maybe_zk_log) @@ -1201,44 +1226,44 @@ bool ZooKeeper::isFeatureEnabled(KeeperFeatureFlag feature_flag) const return keeper_feature_flags.isEnabled(feature_flag); } -void ZooKeeper::initFeatureFlags() +std::optional ZooKeeper::tryGetSystemZnode(const std::string & path, const std::string & description) { - const auto try_get = [&](const std::string & path, const std::string & description) -> std::optional + auto promise = std::make_shared>(); + auto future = promise->get_future(); + + auto callback = [promise](const Coordination::GetResponse & response) mutable { - auto promise = std::make_shared>(); - auto future = promise->get_future(); - - auto callback = [promise](const Coordination::GetResponse & response) mutable - { - promise->set_value(response); - }; - - get(path, std::move(callback), {}); - if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) - throw Exception(Error::ZOPERATIONTIMEOUT, "Failed to get {}: timeout", description); - - auto response = future.get(); - - if (response.error == Coordination::Error::ZNONODE) - { - LOG_TRACE(log, "Failed to get {}", description); - return std::nullopt; - } - else if (response.error != Coordination::Error::ZOK) - { - throw Exception(response.error, "Failed to get {}", description); - } - - return std::move(response.data); + promise->set_value(response); }; - if (auto feature_flags = try_get(keeper_api_feature_flags_path, "feature flags"); feature_flags.has_value()) + get(path, std::move(callback), {}); + if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready) + throw Exception(Error::ZOPERATIONTIMEOUT, "Failed to get {}: timeout", description); + + auto response = future.get(); + + if (response.error == Coordination::Error::ZNONODE) + { + LOG_TRACE(log, "Failed to get {}", description); + return std::nullopt; + } + else if (response.error != Coordination::Error::ZOK) + { + throw Exception(response.error, "Failed to get {}", description); + } + + return std::move(response.data); +} + +void ZooKeeper::initFeatureFlags() +{ + if (auto feature_flags = tryGetSystemZnode(keeper_api_feature_flags_path, "feature flags"); feature_flags.has_value()) { keeper_feature_flags.setFeatureFlags(std::move(*feature_flags)); return; } - auto keeper_api_version_string = try_get(keeper_api_version_path, "API version"); + auto keeper_api_version_string = tryGetSystemZnode(keeper_api_version_path, "API version"); DB::KeeperApiVersion keeper_api_version{DB::KeeperApiVersion::ZOOKEEPER_COMPATIBLE}; @@ -1256,6 +1281,17 @@ void ZooKeeper::initFeatureFlags() keeper_feature_flags.fromApiVersion(keeper_api_version); } +String ZooKeeper::tryGetAvailabilityZone() +{ + auto res = tryGetSystemZnode(keeper_availability_zone_path, "availability zone"); + if (res) + { + LOG_TRACE(log, "Availability zone for ZooKeeper at {}: {}", getConnectedHostPort(), *res); + return *res; + } + return ""; +} + void ZooKeeper::executeGenericRequest( const ZooKeeperRequestPtr & request, @@ -1587,17 +1623,6 @@ void ZooKeeper::setupFaultDistributions() inject_setup.test_and_set(); } -void ZooKeeper::checkSessionDeadline() const -{ - if (unlikely(hasReachedDeadline())) - throw Exception::fromMessage(Error::ZSESSIONEXPIRED, "Session expired (force expiry client-side)"); -} - -bool ZooKeeper::hasReachedDeadline() const -{ - return client_session_deadline.has_value() && clock::now() >= client_session_deadline.value(); -} - void ZooKeeper::maybeInjectSendFault() { if (unlikely(inject_setup.test() && send_inject_fault && send_inject_fault.value()(thread_local_rng))) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 8fdf0f97d9d..0c88c35b381 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -102,21 +103,12 @@ using namespace DB; class ZooKeeper final : public IKeeper { public: - struct Node - { - Poco::Net::SocketAddress address; - UInt8 original_index; - bool secure; - }; - - using Nodes = std::vector; - /** Connection to nodes is performed in order. If you want, shuffle them manually. * Operation timeout couldn't be greater than session timeout. * Operation timeout applies independently for network read, network write, waiting for events and synchronization. */ ZooKeeper( - const Nodes & nodes, + const zkutil::ShuffleHosts & nodes, const zkutil::ZooKeeperArgs & args_, std::shared_ptr zk_log_); @@ -130,9 +122,7 @@ public: String getConnectedHostPort() const override { return (original_index == -1) ? "" : args.hosts[original_index]; } int32_t getConnectionXid() const override { return next_xid.load(); } - /// A ZooKeeper session can have an optional deadline set on it. - /// After it has been reached, the session needs to be finalized. - bool hasReachedDeadline() const override; + String tryGetAvailabilityZone() override; /// Useful to check owner of ephemeral node. int64_t getSessionID() const override { return session_id; } @@ -271,7 +261,6 @@ private: clock::time_point time; }; - std::optional client_session_deadline {}; using RequestsQueue = ConcurrentBoundedQueue; RequestsQueue requests_queue{1024}; @@ -316,7 +305,7 @@ private: LoggerPtr log; void connect( - const Nodes & node, + const zkutil::ShuffleHosts & node, Poco::Timespan connection_timeout); void sendHandshake(); @@ -346,9 +335,10 @@ private: void logOperationIfNeeded(const ZooKeeperRequestPtr & request, const ZooKeeperResponsePtr & response = nullptr, bool finalize = false, UInt64 elapsed_microseconds = 0); + std::optional tryGetSystemZnode(const std::string & path, const std::string & description); + void initFeatureFlags(); - void checkSessionDeadline() const; CurrentMetrics::Increment active_session_metric_increment{CurrentMetrics::ZooKeeperSession}; std::shared_ptr zk_log; diff --git a/src/Common/ZooKeeper/examples/CMakeLists.txt b/src/Common/ZooKeeper/examples/CMakeLists.txt index 678b302a512..11669d765f7 100644 --- a/src/Common/ZooKeeper/examples/CMakeLists.txt +++ b/src/Common/ZooKeeper/examples/CMakeLists.txt @@ -1,15 +1,18 @@ clickhouse_add_executable(zkutil_test_commands zkutil_test_commands.cpp) target_link_libraries(zkutil_test_commands PRIVATE clickhouse_common_zookeeper_no_log + clickhouse_functions dbms) clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp) target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log clickhouse_compression + clickhouse_functions dbms) clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp) target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log + clickhouse_functions dbms) diff --git a/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp b/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp index 25d66b94b46..b3a1564b8ab 100644 --- a/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp +++ b/src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib.cpp @@ -25,24 +25,24 @@ try Poco::Logger::root().setChannel(channel); Poco::Logger::root().setLevel("trace"); - std::string hosts_arg = argv[1]; - std::vector hosts_strings; - splitInto<','>(hosts_strings, hosts_arg); - ZooKeeper::Nodes nodes; - nodes.reserve(hosts_strings.size()); - for (size_t i = 0; i < hosts_strings.size(); ++i) + zkutil::ZooKeeperArgs args{argv[1]}; + zkutil::ShuffleHosts nodes; + nodes.reserve(args.hosts.size()); + for (size_t i = 0; i < args.hosts.size(); ++i) { - std::string host_string = hosts_strings[i]; - bool secure = startsWith(host_string, "secure://"); + zkutil::ShuffleHost node; + std::string host_string = args.hosts[i]; + node.secure = startsWith(host_string, "secure://"); - if (secure) + if (node.secure) host_string.erase(0, strlen("secure://")); - nodes.emplace_back(ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, static_cast(i) , secure}); + node.host = host_string; + node.original_index = i; + + nodes.emplace_back(node); } - - zkutil::ZooKeeperArgs args; ZooKeeper zk(nodes, args, nullptr); Poco::Event event(true); diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 73e1396fb35..410576c2b4a 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -11,10 +11,10 @@ clickhouse_add_executable (small_table small_table.cpp) target_link_libraries (small_table PRIVATE clickhouse_common_io) clickhouse_add_executable (parallel_aggregation parallel_aggregation.cpp) -target_link_libraries (parallel_aggregation PRIVATE dbms) +target_link_libraries (parallel_aggregation PRIVATE dbms clickhouse_functions) clickhouse_add_executable (parallel_aggregation2 parallel_aggregation2.cpp) -target_link_libraries (parallel_aggregation2 PRIVATE dbms) +target_link_libraries (parallel_aggregation2 PRIVATE dbms clickhouse_functions) clickhouse_add_executable (int_hashes_perf int_hashes_perf.cpp) target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io) @@ -85,7 +85,7 @@ target_link_libraries (interval_tree PRIVATE dbms) if (ENABLE_SSL) clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp) - target_link_libraries (encrypt_decrypt PRIVATE dbms) + target_link_libraries (encrypt_decrypt PRIVATE dbms clickhouse_functions) endif() clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp) diff --git a/src/Common/formatReadable.h b/src/Common/formatReadable.h index a05a2a7f9e2..0d7a437219a 100644 --- a/src/Common/formatReadable.h +++ b/src/Common/formatReadable.h @@ -49,7 +49,7 @@ struct fmt::formatter } template - auto format(const ReadableSize & size, FormatContext & ctx) + auto format(const ReadableSize & size, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}", formatReadableSizeWithBinarySuffix(size.value)); } diff --git a/src/Common/memory.h b/src/Common/memory.h index a828ba7a38e..caa0418fa56 100644 --- a/src/Common/memory.h +++ b/src/Common/memory.h @@ -5,6 +5,8 @@ #include #include +#include +#include #include "config.h" #if USE_JEMALLOC @@ -15,11 +17,12 @@ # include #endif -#if USE_GWP_ASAN -# include - -static gwp_asan::GuardedPoolAllocator GuardedAlloc; -#endif +namespace ProfileEvents +{ + extern const Event GWPAsanAllocateSuccess; + extern const Event GWPAsanAllocateFailed; + extern const Event GWPAsanFree; +} namespace Memory { @@ -34,17 +37,31 @@ requires DB::OptionalArgument inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align) { #if USE_GWP_ASAN - if (unlikely(GuardedAlloc.shouldSample())) + if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) { if constexpr (sizeof...(TAlign) == 1) { - if (void * ptr = GuardedAlloc.allocate(size, alignToSizeT(align...))) + if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignToSizeT(align...))) + { + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess); return ptr; + } + else + { + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed); + } } else { - if (void * ptr = GuardedAlloc.allocate(size)) + if (void * ptr = GWPAsan::GuardedAlloc.allocate(size)) + { + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess); return ptr; + } + else + { + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed); + } } } @@ -66,10 +83,17 @@ inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align) inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept { #if USE_GWP_ASAN - if (unlikely(GuardedAlloc.shouldSample())) + if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) { - if (void * ptr = GuardedAlloc.allocate(size)) + if (void * ptr = GWPAsan::GuardedAlloc.allocate(size)) + { + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess); return ptr; + } + else + { + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed); + } } #endif return malloc(size); @@ -78,10 +102,17 @@ inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) noexcept { #if USE_GWP_ASAN - if (unlikely(GuardedAlloc.shouldSample())) + if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) { - if (void * ptr = GuardedAlloc.allocate(size, alignToSizeT(align))) + if (void * ptr = GWPAsan::GuardedAlloc.allocate(size, alignToSizeT(align))) + { + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateSuccess); return ptr; + } + else + { + ProfileEvents::increment(ProfileEvents::GWPAsanAllocateFailed); + } } #endif return aligned_alloc(static_cast(align), size); @@ -90,9 +121,10 @@ inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept { #if USE_GWP_ASAN - if (unlikely(GuardedAlloc.pointerIsMine(ptr))) + if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr))) { - GuardedAlloc.deallocate(ptr); + ProfileEvents::increment(ProfileEvents::GWPAsanFree); + GWPAsan::GuardedAlloc.deallocate(ptr); return; } #endif @@ -109,9 +141,10 @@ inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size, TAlign... al return; #if USE_GWP_ASAN - if (unlikely(GuardedAlloc.pointerIsMine(ptr))) + if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr))) { - GuardedAlloc.deallocate(ptr); + ProfileEvents::increment(ProfileEvents::GWPAsanFree); + GWPAsan::GuardedAlloc.deallocate(ptr); return; } #endif @@ -129,9 +162,10 @@ requires DB::OptionalArgument inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]], TAlign... /* align */) noexcept { #if USE_GWP_ASAN - if (unlikely(GuardedAlloc.pointerIsMine(ptr))) + if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr))) { - GuardedAlloc.deallocate(ptr); + ProfileEvents::increment(ProfileEvents::GWPAsanFree); + GWPAsan::GuardedAlloc.deallocate(ptr); return; } #endif @@ -183,10 +217,10 @@ inline ALWAYS_INLINE size_t untrackMemory(void * ptr [[maybe_unused]], Allocatio std::size_t actual_size = 0; #if USE_GWP_ASAN - if (unlikely(GuardedAlloc.pointerIsMine(ptr))) + if (unlikely(GWPAsan::GuardedAlloc.pointerIsMine(ptr))) { if (!size) - size = GuardedAlloc.getSize(ptr); + size = GWPAsan::GuardedAlloc.getSize(ptr); trace = CurrentMemoryTracker::free(size); return size; } diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index 9e93dca9787..e8151fbe201 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -1,5 +1,4 @@ #include -#include #include #include "config.h" #include @@ -42,27 +41,6 @@ static struct InitializeJemallocZoneAllocatorForOSX } initializeJemallocZoneAllocatorForOSX; #endif -#if USE_GWP_ASAN - -#include - -/// Both clickhouse_new_delete and clickhouse_common_io links gwp_asan, but It should only init once, otherwise it -/// will cause unexpected deadlock. -static struct InitGwpAsan -{ - InitGwpAsan() - { - gwp_asan::options::initOptions(); - gwp_asan::options::Options &opts = gwp_asan::options::getOptions(); - GuardedAlloc.init(opts); - - ///std::cerr << "GwpAsan is initialized, the options are { Enabled: " << opts.Enabled - /// << ", MaxSimultaneousAllocations: " << opts.MaxSimultaneousAllocations - /// << ", SampleRate: " << opts.SampleRate << " }\n"; - } -} init_gwp_asan; -#endif - /// Replace default new/delete with memory tracking versions. /// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new /// https://en.cppreference.com/w/cpp/memory/new/operator_delete diff --git a/src/Common/proxyConfigurationToPocoProxyConfig.cpp b/src/Common/proxyConfigurationToPocoProxyConfig.cpp new file mode 100644 index 00000000000..c06014ac2dc --- /dev/null +++ b/src/Common/proxyConfigurationToPocoProxyConfig.cpp @@ -0,0 +1,117 @@ +#include + + +#include +#include + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" +#pragma clang diagnostic ignored "-Wnested-anon-types" +#pragma clang diagnostic ignored "-Wunused-parameter" +#pragma clang diagnostic ignored "-Wshadow-field-in-constructor" +#pragma clang diagnostic ignored "-Wdtor-name" +#include +#pragma clang diagnostic pop + +namespace DB +{ + +namespace +{ + +/* + * Copy `curl` behavior instead of `wget` as it seems to be more flexible. + * `curl` strips leading dot and accepts url gitlab.com as a match for no_proxy .gitlab.com, + * while `wget` does an exact match. + * */ +std::string buildPocoRegexpEntryWithoutLeadingDot(const std::string & host) +{ + std::string_view view_without_leading_dot = host; + if (host[0] == '.') + { + view_without_leading_dot = std::string_view {host.begin() + 1u, host.end()}; + } + + return RE2::QuoteMeta(view_without_leading_dot); +} + +} + +/* + * Even though there is not an RFC that defines NO_PROXY, it is usually a comma-separated list of domains. + * Different tools implement their own versions of `NO_PROXY` support. Some support CIDR blocks, some support wildcard etc. + * Opting for a simple implementation that covers most use cases: + * * Support only single wildcard * (match anything) + * * Match subdomains + * * Strip leading dots + * * No regex + * * No CIDR blocks + * * No fancy stuff about loopback IPs + * https://about.gitlab.com/blog/2021/01/27/we-need-to-talk-no-proxy/ + * Open for discussions + * */ +std::string buildPocoNonProxyHosts(const std::string & no_proxy_hosts_string) +{ + if (no_proxy_hosts_string.empty()) + { + return ""; + } + + static constexpr auto OR_SEPARATOR = "|"; + static constexpr auto MATCH_ANYTHING = R"(.*)"; + static constexpr auto MATCH_SUBDOMAINS_REGEX = R"((?:.*\.)?)"; + + bool match_any_host = no_proxy_hosts_string.size() == 1 && no_proxy_hosts_string[0] == '*'; + + if (match_any_host) + { + return MATCH_ANYTHING; + } + + std::vector no_proxy_hosts; + splitInto<','>(no_proxy_hosts, no_proxy_hosts_string); + + bool first = true; + std::string result; + + for (auto & host : no_proxy_hosts) + { + trim(host); + + if (host.empty()) + { + continue; + } + + if (!first) + { + result.append(OR_SEPARATOR); + } + + auto escaped_host_without_leading_dot = buildPocoRegexpEntryWithoutLeadingDot(host); + + result.append(MATCH_SUBDOMAINS_REGEX); + result.append(escaped_host_without_leading_dot); + + first = false; + } + + return result; +} + +Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration) +{ + Poco::Net::HTTPClientSession::ProxyConfig poco_proxy_config; + + poco_proxy_config.host = proxy_configuration.host; + poco_proxy_config.port = proxy_configuration.port; + poco_proxy_config.protocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.protocol); + poco_proxy_config.tunnel = proxy_configuration.tunneling; + poco_proxy_config.originalRequestProtocol = DB::ProxyConfiguration::protocolToString(proxy_configuration.original_request_protocol); + poco_proxy_config.nonProxyHosts = proxy_configuration.no_proxy_hosts; + + return poco_proxy_config; +} + +} diff --git a/src/Common/proxyConfigurationToPocoProxyConfig.h b/src/Common/proxyConfigurationToPocoProxyConfig.h new file mode 100644 index 00000000000..c118bd059f9 --- /dev/null +++ b/src/Common/proxyConfigurationToPocoProxyConfig.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + +namespace DB +{ + +Poco::Net::HTTPClientSession::ProxyConfig proxyConfigurationToPocoProxyConfig(const DB::ProxyConfiguration & proxy_configuration); + +std::string buildPocoNonProxyHosts(const std::string & no_proxy_hosts_string); + +} diff --git a/src/Common/tests/gtest_helper_functions.h b/src/Common/tests/gtest_helper_functions.h index 54c9ae9170d..90c5d4d2088 100644 --- a/src/Common/tests/gtest_helper_functions.h +++ b/src/Common/tests/gtest_helper_functions.h @@ -76,22 +76,28 @@ inline std::string xmlNodeAsString(Poco::XML::Node *pNode) struct EnvironmentProxySetter { - EnvironmentProxySetter(const Poco::URI & http_proxy, const Poco::URI & https_proxy) - { - if (!http_proxy.empty()) - { - setenv("http_proxy", http_proxy.toString().c_str(), 1); // NOLINT(concurrency-mt-unsafe) - } + static constexpr auto * NO_PROXY = "*"; + static constexpr auto * HTTP_PROXY = "http://proxy_server:3128"; + static constexpr auto * HTTPS_PROXY = "https://proxy_server:3128"; - if (!https_proxy.empty()) - { - setenv("https_proxy", https_proxy.toString().c_str(), 1); // NOLINT(concurrency-mt-unsafe) - } + EnvironmentProxySetter() + { + setenv("http_proxy", HTTP_PROXY, 1); // NOLINT(concurrency-mt-unsafe) + + setenv("https_proxy", HTTPS_PROXY, 1); // NOLINT(concurrency-mt-unsafe) + + // Some other tests rely on HTTP clients (e.g, gtest_aws_s3_client), which depend on proxy configuration + // since in https://github.com/ClickHouse/ClickHouse/pull/63314 the environment proxy resolver reads only once + // from the environment, the proxy configuration will always be there. + // The problem is that the proxy server does not exist, causing the test to fail. + // To work around this issue, `no_proxy` is set to bypass all domains. + setenv("no_proxy", NO_PROXY, 1); // NOLINT(concurrency-mt-unsafe) } ~EnvironmentProxySetter() { unsetenv("http_proxy"); // NOLINT(concurrency-mt-unsafe) unsetenv("https_proxy"); // NOLINT(concurrency-mt-unsafe) + unsetenv("no_proxy"); // NOLINT(concurrency-mt-unsafe) } }; diff --git a/src/Common/tests/gtest_named_collections.cpp b/src/Common/tests/gtest_named_collections.cpp index 8a8a364961b..8d9aa2bc213 100644 --- a/src/Common/tests/gtest_named_collections.cpp +++ b/src/Common/tests/gtest_named_collections.cpp @@ -1,12 +1,40 @@ #include #include -#include #include #include #include using namespace DB; +/// A class which allows to test private methods of NamedCollectionFactory. +class NamedCollectionFactoryFriend : public NamedCollectionFactory +{ +public: + static NamedCollectionFactoryFriend & instance() + { + static NamedCollectionFactoryFriend instance; + return instance; + } + + void loadFromConfig(const Poco::Util::AbstractConfiguration & config) + { + std::lock_guard lock(mutex); + NamedCollectionFactory::loadFromConfig(config, lock); + } + + void add(const std::string & collection_name, MutableNamedCollectionPtr collection) + { + std::lock_guard lock(mutex); + NamedCollectionFactory::add(collection_name, collection, lock); + } + + void remove(const std::string & collection_name) + { + std::lock_guard lock(mutex); + NamedCollectionFactory::remove(collection_name, lock); + } +}; + TEST(NamedCollections, SimpleConfig) { std::string xml(R"CONFIG( @@ -29,13 +57,13 @@ TEST(NamedCollections, SimpleConfig) Poco::AutoPtr document = dom_parser.parseString(xml); Poco::AutoPtr config = new Poco::Util::XMLConfiguration(document); - NamedCollectionUtils::loadFromConfig(*config); + NamedCollectionFactoryFriend::instance().loadFromConfig(*config); - ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection1")); - ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection2")); - ASSERT_TRUE(NamedCollectionFactory::instance().tryGet("collection3") == nullptr); + ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection1")); + ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection2")); + ASSERT_TRUE(NamedCollectionFactoryFriend::instance().tryGet("collection3") == nullptr); - auto collections = NamedCollectionFactory::instance().getAll(); + auto collections = NamedCollectionFactoryFriend::instance().getAll(); ASSERT_EQ(collections.size(), 2); ASSERT_TRUE(collections.contains("collection1")); ASSERT_TRUE(collections.contains("collection2")); @@ -47,7 +75,7 @@ key3: 3.3 key4: -4 )CONFIG"); - auto collection1 = NamedCollectionFactory::instance().get("collection1"); + auto collection1 = NamedCollectionFactoryFriend::instance().get("collection1"); ASSERT_TRUE(collection1 != nullptr); ASSERT_TRUE(collection1->get("key1") == "value1"); @@ -61,7 +89,7 @@ key5: 5 key6: 6.6 )CONFIG"); - auto collection2 = NamedCollectionFactory::instance().get("collection2"); + auto collection2 = NamedCollectionFactoryFriend::instance().get("collection2"); ASSERT_TRUE(collection2 != nullptr); ASSERT_TRUE(collection2->get("key4") == "value4"); @@ -69,9 +97,9 @@ key6: 6.6 ASSERT_TRUE(collection2->get("key6") == 6.6); auto collection2_copy = collections["collection2"]->duplicate(); - NamedCollectionFactory::instance().add("collection2_copy", collection2_copy); - ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection2_copy")); - ASSERT_EQ(NamedCollectionFactory::instance().get("collection2_copy")->dumpStructure(), + NamedCollectionFactoryFriend::instance().add("collection2_copy", collection2_copy); + ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection2_copy")); + ASSERT_EQ(NamedCollectionFactoryFriend::instance().get("collection2_copy")->dumpStructure(), R"CONFIG(key4: value4 key5: 5 key6: 6.6 @@ -88,8 +116,8 @@ key6: 6.6 collection2_copy->setOrUpdate("key4", "value45", {}); ASSERT_EQ(collection2_copy->getOrDefault("key4", "N"), "value45"); - NamedCollectionFactory::instance().remove("collection2_copy"); - ASSERT_FALSE(NamedCollectionFactory::instance().exists("collection2_copy")); + NamedCollectionFactoryFriend::instance().remove("collection2_copy"); + ASSERT_FALSE(NamedCollectionFactoryFriend::instance().exists("collection2_copy")); config.reset(); } @@ -119,11 +147,11 @@ TEST(NamedCollections, NestedConfig) Poco::AutoPtr document = dom_parser.parseString(xml); Poco::AutoPtr config = new Poco::Util::XMLConfiguration(document); - NamedCollectionUtils::loadFromConfig(*config); + NamedCollectionFactoryFriend::instance().loadFromConfig(*config); - ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection3")); + ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection3")); - auto collection = NamedCollectionFactory::instance().get("collection3"); + auto collection = NamedCollectionFactoryFriend::instance().get("collection3"); ASSERT_TRUE(collection != nullptr); ASSERT_EQ(collection->dumpStructure(), @@ -171,8 +199,8 @@ TEST(NamedCollections, NestedConfigDuplicateKeys) Poco::AutoPtr document = dom_parser.parseString(xml); Poco::AutoPtr config = new Poco::Util::XMLConfiguration(document); - NamedCollectionUtils::loadFromConfig(*config); - auto collection = NamedCollectionFactory::instance().get("collection"); + NamedCollectionFactoryFriend::instance().loadFromConfig(*config); + auto collection = NamedCollectionFactoryFriend::instance().get("collection"); auto keys = collection->getKeys(); ASSERT_EQ(keys.size(), 6); diff --git a/src/Common/tests/gtest_poco_no_proxy_regex.cpp b/src/Common/tests/gtest_poco_no_proxy_regex.cpp new file mode 100644 index 00000000000..c3c1b512c08 --- /dev/null +++ b/src/Common/tests/gtest_poco_no_proxy_regex.cpp @@ -0,0 +1,24 @@ +#include + +#include + +TEST(ProxyConfigurationToPocoProxyConfiguration, TestNoProxyHostRegexBuild) +{ + ASSERT_EQ( + DB::buildPocoNonProxyHosts("localhost,127.0.0.1,some_other_domain:8080,sub-domain.domain.com"), + R"((?:.*\.)?localhost|(?:.*\.)?127\.0\.0\.1|(?:.*\.)?some_other_domain\:8080|(?:.*\.)?sub\-domain\.domain\.com)"); +} + +TEST(ProxyConfigurationToPocoProxyConfiguration, TestNoProxyHostRegexBuildMatchAnything) +{ + ASSERT_EQ( + DB::buildPocoNonProxyHosts("*"), + ".*"); +} + +TEST(ProxyConfigurationToPocoProxyConfiguration, TestNoProxyHostRegexBuildEmpty) +{ + ASSERT_EQ( + DB::buildPocoNonProxyHosts(""), + ""); +} diff --git a/src/Common/tests/gtest_proxy_configuration_resolver_provider.cpp b/src/Common/tests/gtest_proxy_configuration_resolver_provider.cpp index d5d6f86f661..7bc48203998 100644 --- a/src/Common/tests/gtest_proxy_configuration_resolver_provider.cpp +++ b/src/Common/tests/gtest_proxy_configuration_resolver_provider.cpp @@ -1,6 +1,9 @@ #include #include +#include +#include +#include #include #include @@ -25,27 +28,19 @@ protected: DB::ContextMutablePtr ProxyConfigurationResolverProviderTests::context; -Poco::URI http_env_proxy_server = Poco::URI("http://http_environment_proxy:3128"); -Poco::URI https_env_proxy_server = Poco::URI("http://https_environment_proxy:3128"); - Poco::URI http_list_proxy_server = Poco::URI("http://http_list_proxy:3128"); Poco::URI https_list_proxy_server = Poco::URI("http://https_list_proxy:3128"); TEST_F(ProxyConfigurationResolverProviderTests, EnvironmentResolverShouldBeUsedIfNoSettings) { - EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server); + EnvironmentProxySetter setter; const auto & config = getContext().context->getConfigRef(); - auto http_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, config)->resolve(); - auto https_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, config)->resolve(); + auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, config); + auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, config); - ASSERT_EQ(http_configuration.host, http_env_proxy_server.getHost()); - ASSERT_EQ(http_configuration.port, http_env_proxy_server.getPort()); - ASSERT_EQ(http_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_env_proxy_server.getScheme())); - - ASSERT_EQ(https_configuration.host, https_env_proxy_server.getHost()); - ASSERT_EQ(https_configuration.port, https_env_proxy_server.getPort()); - ASSERT_EQ(https_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_env_proxy_server.getScheme())); + ASSERT_TRUE(std::dynamic_pointer_cast(http_resolver)); + ASSERT_TRUE(std::dynamic_pointer_cast(https_resolver)); } TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPOnly) @@ -57,17 +52,11 @@ TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPOnly) config->setString("proxy.http.uri", http_list_proxy_server.toString()); context->setConfig(config); - auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve(); + auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config); + auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config); - ASSERT_EQ(http_proxy_configuration.host, http_list_proxy_server.getHost()); - ASSERT_EQ(http_proxy_configuration.port, http_list_proxy_server.getPort()); - ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_list_proxy_server.getScheme())); - - auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve(); - - // No https configuration since it's not set - ASSERT_EQ(https_proxy_configuration.host, ""); - ASSERT_EQ(https_proxy_configuration.port, 0); + ASSERT_TRUE(std::dynamic_pointer_cast(http_resolver)); + ASSERT_TRUE(std::dynamic_pointer_cast(https_resolver)); } TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPSOnly) @@ -79,18 +68,11 @@ TEST_F(ProxyConfigurationResolverProviderTests, ListHTTPSOnly) config->setString("proxy.https.uri", https_list_proxy_server.toString()); context->setConfig(config); - auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve(); + auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config); + auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config); - ASSERT_EQ(http_proxy_configuration.host, ""); - ASSERT_EQ(http_proxy_configuration.port, 0); - - auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve(); - - ASSERT_EQ(https_proxy_configuration.host, https_list_proxy_server.getHost()); - - // still HTTP because the proxy host is not HTTPS - ASSERT_EQ(https_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_list_proxy_server.getScheme())); - ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort()); + ASSERT_TRUE(std::dynamic_pointer_cast(http_resolver)); + ASSERT_TRUE(std::dynamic_pointer_cast(https_resolver)); } TEST_F(ProxyConfigurationResolverProviderTests, ListBoth) @@ -107,70 +89,15 @@ TEST_F(ProxyConfigurationResolverProviderTests, ListBoth) context->setConfig(config); - auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve(); + auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config); + auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config); - ASSERT_EQ(http_proxy_configuration.host, http_list_proxy_server.getHost()); - ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_list_proxy_server.getScheme())); - ASSERT_EQ(http_proxy_configuration.port, http_list_proxy_server.getPort()); - - auto https_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve(); - - ASSERT_EQ(https_proxy_configuration.host, https_list_proxy_server.getHost()); - - // still HTTP because the proxy host is not HTTPS - ASSERT_EQ(https_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_list_proxy_server.getScheme())); - ASSERT_EQ(https_proxy_configuration.port, https_list_proxy_server.getPort()); -} - -TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolConfigurationHTTP) -{ - /* - * Since there is no way to call `ProxyConfigurationResolver::resolve` on remote resolver, - * it is hard to verify the remote resolver was actually picked. One hackish way to assert - * the remote resolver was OR was not picked based on the configuration, is to use the - * environment resolver. Since the environment resolver is always returned as a fallback, - * we can assert the remote resolver was not picked if `ProxyConfigurationResolver::resolve` - * succeeds and returns an environment proxy configuration. - * */ - EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server); - - ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration()); - - config->setString("proxy", ""); - config->setString("proxy.https", ""); - config->setString("proxy.https.resolver", ""); - config->setString("proxy.https.resolver.endpoint", "http://resolver:8080/hostname"); - - // even tho proxy protocol / scheme is http, it should not be picked (prior to this PR, it would be picked) - config->setString("proxy.https.resolver.proxy_scheme", "http"); - config->setString("proxy.https.resolver.proxy_port", "80"); - config->setString("proxy.https.resolver.proxy_cache_time", "10"); - - context->setConfig(config); - - auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config)->resolve(); - - /* - * Asserts env proxy is used and not the remote resolver. If the remote resolver is picked, it is an error because - * there is no `http` specification for remote resolver - * */ - ASSERT_EQ(http_proxy_configuration.host, http_env_proxy_server.getHost()); - ASSERT_EQ(http_proxy_configuration.port, http_env_proxy_server.getPort()); - ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(http_env_proxy_server.getScheme())); + ASSERT_TRUE(std::dynamic_pointer_cast(http_resolver)); + ASSERT_TRUE(std::dynamic_pointer_cast(https_resolver)); } TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolConfigurationHTTPS) { - /* - * Since there is no way to call `ProxyConfigurationResolver::resolve` on remote resolver, - * it is hard to verify the remote resolver was actually picked. One hackish way to assert - * the remote resolver was OR was not picked based on the configuration, is to use the - * environment resolver. Since the environment resolver is always returned as a fallback, - * we can assert the remote resolver was not picked if `ProxyConfigurationResolver::resolve` - * succeeds and returns an environment proxy configuration. - * */ - EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server); - ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration()); config->setString("proxy", ""); @@ -185,27 +112,44 @@ TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverIsBasedOnProtocolC context->setConfig(config); - auto http_proxy_configuration = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config)->resolve(); + auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config); + auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config); - /* - * Asserts env proxy is used and not the remote resolver. If the remote resolver is picked, it is an error because - * there is no `http` specification for remote resolver - * */ - ASSERT_EQ(http_proxy_configuration.host, https_env_proxy_server.getHost()); - ASSERT_EQ(http_proxy_configuration.port, https_env_proxy_server.getPort()); - ASSERT_EQ(http_proxy_configuration.protocol, DB::ProxyConfiguration::protocolFromString(https_env_proxy_server.getScheme())); + ASSERT_TRUE(std::dynamic_pointer_cast(http_resolver)); + ASSERT_TRUE(std::dynamic_pointer_cast(https_resolver)); } -// remote resolver is tricky to be tested in unit tests +TEST_F(ProxyConfigurationResolverProviderTests, RemoteResolverHTTPSOnly) +{ + ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration()); + + config->setString("proxy", ""); + config->setString("proxy.https", ""); + config->setString("proxy.https.resolver", ""); + config->setString("proxy.https.resolver.endpoint", "http://resolver:8080/hostname"); + + // even tho proxy protocol / scheme is http, it should not be picked (prior to this PR, it would be picked) + config->setString("proxy.https.resolver.proxy_scheme", "http"); + config->setString("proxy.https.resolver.proxy_port", "80"); + config->setString("proxy.https.resolver.proxy_cache_time", "10"); + + context->setConfig(config); + + auto http_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTP, *config); + auto https_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::Protocol::HTTPS, *config); + + ASSERT_TRUE(std::dynamic_pointer_cast(http_resolver)); + ASSERT_TRUE(std::dynamic_pointer_cast(https_resolver)); +} template void test_tunneling(DB::ContextMutablePtr context) { - EnvironmentProxySetter setter(http_env_proxy_server, https_env_proxy_server); - ConfigurationPtr config = Poco::AutoPtr(new Poco::Util::MapConfiguration()); config->setString("proxy", ""); + config->setString("proxy.https", ""); + config->setString("proxy.https.uri", http_list_proxy_server.toString()); if constexpr (STRING) { @@ -230,4 +174,3 @@ TEST_F(ProxyConfigurationResolverProviderTests, TunnelingForHTTPSRequestsOverHTT test_tunneling(context); test_tunneling(context); } - diff --git a/src/Common/tests/gtest_proxy_environment_configuration.cpp b/src/Common/tests/gtest_proxy_environment_configuration.cpp index 377bef385f6..708c7194785 100644 --- a/src/Common/tests/gtest_proxy_environment_configuration.cpp +++ b/src/Common/tests/gtest_proxy_environment_configuration.cpp @@ -2,81 +2,38 @@ #include #include +#include #include namespace DB { -namespace +TEST(EnvironmentProxyConfigurationResolver, TestHTTPandHTTPS) { - auto http_proxy_server = Poco::URI("http://proxy_server:3128"); - auto https_proxy_server = Poco::URI("https://proxy_server:3128"); -} + const auto http_proxy_server = Poco::URI(EnvironmentProxySetter::HTTP_PROXY); + const auto https_proxy_server = Poco::URI(EnvironmentProxySetter::HTTPS_PROXY); -TEST(EnvironmentProxyConfigurationResolver, TestHTTP) -{ - EnvironmentProxySetter setter(http_proxy_server, {}); + std::string poco_no_proxy_regex = buildPocoNonProxyHosts(EnvironmentProxySetter::NO_PROXY); - EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTP); + EnvironmentProxySetter setter; - auto configuration = resolver.resolve(); + EnvironmentProxyConfigurationResolver http_resolver(ProxyConfiguration::Protocol::HTTP); - ASSERT_EQ(configuration.host, http_proxy_server.getHost()); - ASSERT_EQ(configuration.port, http_proxy_server.getPort()); - ASSERT_EQ(configuration.protocol, ProxyConfiguration::protocolFromString(http_proxy_server.getScheme())); -} + auto http_configuration = http_resolver.resolve(); -TEST(EnvironmentProxyConfigurationResolver, TestHTTPNoEnv) -{ - EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTP); + ASSERT_EQ(http_configuration.host, http_proxy_server.getHost()); + ASSERT_EQ(http_configuration.port, http_proxy_server.getPort()); + ASSERT_EQ(http_configuration.protocol, ProxyConfiguration::protocolFromString(http_proxy_server.getScheme())); + ASSERT_EQ(http_configuration.no_proxy_hosts, poco_no_proxy_regex); - auto configuration = resolver.resolve(); + EnvironmentProxyConfigurationResolver https_resolver(ProxyConfiguration::Protocol::HTTPS); - ASSERT_EQ(configuration.host, ""); - ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP); - ASSERT_EQ(configuration.port, 0u); -} + auto https_configuration = https_resolver.resolve(); -TEST(EnvironmentProxyConfigurationResolver, TestHTTPs) -{ - EnvironmentProxySetter setter({}, https_proxy_server); - - EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTPS); - - auto configuration = resolver.resolve(); - - ASSERT_EQ(configuration.host, https_proxy_server.getHost()); - ASSERT_EQ(configuration.port, https_proxy_server.getPort()); - ASSERT_EQ(configuration.protocol, ProxyConfiguration::protocolFromString(https_proxy_server.getScheme())); -} - -TEST(EnvironmentProxyConfigurationResolver, TestHTTPsNoEnv) -{ - EnvironmentProxyConfigurationResolver resolver(ProxyConfiguration::Protocol::HTTPS); - - auto configuration = resolver.resolve(); - - ASSERT_EQ(configuration.host, ""); - ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP); - ASSERT_EQ(configuration.port, 0u); -} - -TEST(EnvironmentProxyConfigurationResolver, TestHTTPsOverHTTPTunnelingDisabled) -{ - // use http proxy for https, this would use connect protocol by default - EnvironmentProxySetter setter({}, http_proxy_server); - - bool disable_tunneling_for_https_requests_over_http_proxy = true; - - EnvironmentProxyConfigurationResolver resolver( - ProxyConfiguration::Protocol::HTTPS, disable_tunneling_for_https_requests_over_http_proxy); - - auto configuration = resolver.resolve(); - - ASSERT_EQ(configuration.host, http_proxy_server.getHost()); - ASSERT_EQ(configuration.port, http_proxy_server.getPort()); - ASSERT_EQ(configuration.protocol, ProxyConfiguration::protocolFromString(http_proxy_server.getScheme())); - ASSERT_EQ(configuration.tunneling, false); + ASSERT_EQ(https_configuration.host, https_proxy_server.getHost()); + ASSERT_EQ(https_configuration.port, https_proxy_server.getPort()); + ASSERT_EQ(https_configuration.protocol, ProxyConfiguration::protocolFromString(https_proxy_server.getScheme())); + ASSERT_EQ(https_configuration.no_proxy_hosts, poco_no_proxy_regex); } } diff --git a/src/Common/tests/gtest_proxy_list_configuration_resolver.cpp b/src/Common/tests/gtest_proxy_list_configuration_resolver.cpp index 3234fe0ccd1..5d8268eb206 100644 --- a/src/Common/tests/gtest_proxy_list_configuration_resolver.cpp +++ b/src/Common/tests/gtest_proxy_list_configuration_resolver.cpp @@ -10,6 +10,8 @@ namespace { auto proxy_server1 = Poco::URI("http://proxy_server1:3128"); auto proxy_server2 = Poco::URI("http://proxy_server2:3128"); + + std::string no_proxy_hosts = "localhost,,127.0.0.1,some_other_domain,,,, sub-domain.domain.com,"; } TEST(ProxyListConfigurationResolver, SimpleTest) @@ -17,7 +19,8 @@ TEST(ProxyListConfigurationResolver, SimpleTest) ProxyListConfigurationResolver resolver( {proxy_server1, proxy_server2}, - ProxyConfiguration::Protocol::HTTP); + ProxyConfiguration::Protocol::HTTP, + no_proxy_hosts); auto configuration1 = resolver.resolve(); auto configuration2 = resolver.resolve(); @@ -25,10 +28,12 @@ TEST(ProxyListConfigurationResolver, SimpleTest) ASSERT_EQ(configuration1.host, proxy_server1.getHost()); ASSERT_EQ(configuration1.port, proxy_server1.getPort()); ASSERT_EQ(configuration1.protocol, ProxyConfiguration::protocolFromString(proxy_server1.getScheme())); + ASSERT_EQ(configuration1.no_proxy_hosts, no_proxy_hosts); ASSERT_EQ(configuration2.host, proxy_server2.getHost()); ASSERT_EQ(configuration2.port, proxy_server2.getPort()); ASSERT_EQ(configuration2.protocol, ProxyConfiguration::protocolFromString(proxy_server2.getScheme())); + ASSERT_EQ(configuration2.no_proxy_hosts, no_proxy_hosts); } TEST(ProxyListConfigurationResolver, HTTPSRequestsOverHTTPProxyDefault) @@ -36,7 +41,8 @@ TEST(ProxyListConfigurationResolver, HTTPSRequestsOverHTTPProxyDefault) ProxyListConfigurationResolver resolver( {proxy_server1, proxy_server2}, - ProxyConfiguration::Protocol::HTTPS); + ProxyConfiguration::Protocol::HTTPS, + ""); auto configuration1 = resolver.resolve(); auto configuration2 = resolver.resolve(); @@ -45,11 +51,12 @@ TEST(ProxyListConfigurationResolver, HTTPSRequestsOverHTTPProxyDefault) ASSERT_EQ(configuration1.port, proxy_server1.getPort()); ASSERT_EQ(configuration1.protocol, ProxyConfiguration::protocolFromString(proxy_server1.getScheme())); ASSERT_EQ(configuration1.tunneling, true); + ASSERT_EQ(configuration1.no_proxy_hosts, ""); ASSERT_EQ(configuration2.host, proxy_server2.getHost()); ASSERT_EQ(configuration2.port, proxy_server2.getPort()); ASSERT_EQ(configuration2.protocol, ProxyConfiguration::protocolFromString(proxy_server2.getScheme())); - ASSERT_EQ(configuration1.tunneling, true); + ASSERT_EQ(configuration2.no_proxy_hosts, ""); } TEST(ProxyListConfigurationResolver, SimpleTestTunnelingDisabled) @@ -58,6 +65,7 @@ TEST(ProxyListConfigurationResolver, SimpleTestTunnelingDisabled) ProxyListConfigurationResolver resolver( {proxy_server1, proxy_server2}, ProxyConfiguration::Protocol::HTTPS, + "", disable_tunneling_for_https_requests_over_http_proxy); auto configuration1 = resolver.resolve(); diff --git a/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp b/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp index 7068e0f2061..5489a931f24 100644 --- a/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp +++ b/src/Common/tests/gtest_proxy_remote_configuration_resolver.cpp @@ -42,6 +42,7 @@ TEST(RemoteProxyConfigurationResolver, HTTPOverHTTP) RemoteProxyConfigurationResolver resolver( remote_server_configuration, ProxyConfiguration::Protocol::HTTP, + "", std::make_shared(proxy_server_mock) ); @@ -68,6 +69,7 @@ TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPS) RemoteProxyConfigurationResolver resolver( remote_server_configuration, ProxyConfiguration::Protocol::HTTPS, + "", std::make_shared(proxy_server_mock) ); @@ -95,6 +97,7 @@ TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTP) RemoteProxyConfigurationResolver resolver( remote_server_configuration, ProxyConfiguration::Protocol::HTTPS, + "", std::make_shared(proxy_server_mock) ); @@ -122,6 +125,7 @@ TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPNoTunneling) RemoteProxyConfigurationResolver resolver( remote_server_configuration, ProxyConfiguration::Protocol::HTTPS, + "", std::make_shared(proxy_server_mock), true /* disable_tunneling_for_https_requests_over_http_proxy_ */ ); @@ -153,6 +157,7 @@ TEST(RemoteProxyConfigurationResolver, SimpleCacheTest) RemoteProxyConfigurationResolver resolver( remote_server_configuration, ProxyConfiguration::Protocol::HTTP, + "", fetcher_mock ); diff --git a/src/Common/tests/gtest_resolve_pool.cpp b/src/Common/tests/gtest_resolve_pool.cpp index 2391fc8bacf..b760b9b1524 100644 --- a/src/Common/tests/gtest_resolve_pool.cpp +++ b/src/Common/tests/gtest_resolve_pool.cpp @@ -1,12 +1,39 @@ #include -#include #include #include -#include "base/defines.h" +#include + +#include #include +#include #include -#include + + +using namespace std::literals::chrono_literals; + + +auto now() +{ + return std::chrono::steady_clock::now(); +} + +void sleep_until(auto time_point) +{ + std::this_thread::sleep_until(time_point); +} + +void sleep_for(auto duration) +{ + std::this_thread::sleep_for(duration); +} + +size_t toMilliseconds(auto duration) +{ + return std::chrono::duration_cast(duration).count(); +} + +const auto epsilon = 500us; class ResolvePoolMock : public DB::HostResolver { @@ -267,13 +294,14 @@ TEST_F(ResolvePoolTest, CanFailAndHeal) TEST_F(ResolvePoolTest, CanExpire) { - auto resolver = make_resolver(); + auto history = 5ms; + auto resolver = make_resolver(toMilliseconds(history)); auto expired_addr = resolver->resolve(); ASSERT_TRUE(addresses.contains(*expired_addr)); - addresses.erase(*expired_addr); - sleepForSeconds(1); + + sleep_for(history + epsilon); for (size_t i = 0; i < 1000; ++i) { @@ -310,12 +338,19 @@ TEST_F(ResolvePoolTest, DuplicatesInAddresses) ASSERT_EQ(3, DB::CurrentThread::getProfileEvents()[metrics.discovered]); } -void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses, auto & failed_addr, auto & metrics) +void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses, auto & failed_addr, auto & metrics, auto deadline) { ASSERT_EQ(iteration, DB::CurrentThread::getProfileEvents()[metrics.failed]); for (size_t i = 0; i < 100; ++i) { auto next_addr = resolver->resolve(); + + if (now() > deadline) + { + ASSERT_NE(i, 0); + break; + } + ASSERT_TRUE(addresses.contains(*next_addr)); ASSERT_NE(*next_addr, *failed_addr); } @@ -323,52 +358,60 @@ void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses TEST_F(ResolvePoolTest, BannedForConsiquenceFail) { - size_t history_ms = 5; - auto resolver = make_resolver(history_ms); - + auto history = 5ms; + auto resolver = make_resolver(toMilliseconds(history)); auto failed_addr = resolver->resolve(); ASSERT_TRUE(addresses.contains(*failed_addr)); + auto start_at = now(); + failed_addr.setFail(); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); - check_no_failed_address(1, resolver, addresses, failed_addr, metrics); + check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon); + + sleep_until(start_at + history + epsilon); + start_at = now(); - sleepForMilliseconds(history_ms + 1); resolver->update(); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count)); failed_addr.setFail(); - check_no_failed_address(2, resolver, addresses, failed_addr, metrics); + check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon); + + sleep_until(start_at + history + epsilon); + start_at = now(); - sleepForMilliseconds(history_ms + 1); resolver->update(); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); // ip still banned adter history_ms + update, because it was his second consiquent fail - check_no_failed_address(2, resolver, addresses, failed_addr, metrics); + check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon); } TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail) { - size_t history_ms = 5; - auto resolver = make_resolver(history_ms); + auto history = 5ms; + auto resolver = make_resolver(toMilliseconds(history)); auto failed_addr = resolver->resolve(); ASSERT_TRUE(addresses.contains(*failed_addr)); + auto start_at = now(); + failed_addr.setFail(); failed_addr.setFail(); failed_addr.setFail(); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); - check_no_failed_address(3, resolver, addresses, failed_addr, metrics); + check_no_failed_address(3, resolver, addresses, failed_addr, metrics, start_at + history - epsilon); + + sleep_until(start_at + history + epsilon); - sleepForMilliseconds(history_ms + 1); resolver->update(); // ip is cleared after just 1 history_ms interval. ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); @@ -377,8 +420,8 @@ TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail) TEST_F(ResolvePoolTest, StillBannedAfterSuccess) { - size_t history_ms = 5; - auto resolver = make_resolver(history_ms); + auto history = 5ms; + auto resolver = make_resolver(toMilliseconds(history)); auto failed_addr = resolver->resolve(); ASSERT_TRUE(addresses.contains(*failed_addr)); @@ -395,11 +438,12 @@ TEST_F(ResolvePoolTest, StillBannedAfterSuccess) } chassert(again_addr); + auto start_at = now(); failed_addr.setFail(); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); - check_no_failed_address(1, resolver, addresses, failed_addr, metrics); + check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon); again_addr = std::nullopt; // success; diff --git a/src/Common/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h index 5beec660801..afbdcf2df19 100644 --- a/src/Common/threadPoolCallbackRunner.h +++ b/src/Common/threadPoolCallbackRunner.h @@ -54,7 +54,6 @@ ThreadPoolCallbackRunnerUnsafe threadPoolCallbackRunnerUnsafe( auto future = task->get_future(); - /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". /// Note: calling method scheduleOrThrowOnError in intentional, because we don't want to throw exceptions /// in critical places where this callback runner is used (e.g. loading or deletion of parts) my_pool->scheduleOrThrowOnError([my_task = std::move(task)]{ (*my_task)(); }, priority); @@ -163,7 +162,6 @@ public: task->future = task_func->get_future(); - /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". /// Note: calling method scheduleOrThrowOnError in intentional, because we don't want to throw exceptions /// in critical places where this callback runner is used (e.g. loading or deletion of parts) pool.scheduleOrThrowOnError([my_task = std::move(task_func)]{ (*my_task)(); }, priority); diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index f16330332ab..83c9fbc9573 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -57,14 +57,16 @@ void CompressedWriteBuffer::nextImpl() } } -CompressedWriteBuffer::~CompressedWriteBuffer() -{ - finalize(); -} - CompressedWriteBuffer::CompressedWriteBuffer(WriteBuffer & out_, CompressionCodecPtr codec_, size_t buf_size) : BufferWithOwnMemory(buf_size), out(out_), codec(std::move(codec_)) { } +CompressedWriteBuffer::~CompressedWriteBuffer() +{ + if (!canceled) + finalize(); +} + + } diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index ad6f95b3902..9607c345a3b 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -808,7 +808,11 @@ void LogEntryStorage::startCommitLogsPrefetch(uint64_t last_committed_index) con for (; current_index <= max_index_for_prefetch; ++current_index) { - const auto & [changelog_description, position, size] = logs_location.at(current_index); + auto location_it = logs_location.find(current_index); + if (location_it == logs_location.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Location of log entry with index {} is missing", current_index); + + const auto & [changelog_description, position, size] = location_it->second; if (total_size == 0) current_file_info = &file_infos.emplace_back(changelog_description, position, /* count */ 1); else if (total_size + size > commit_logs_cache.size_threshold) @@ -1416,7 +1420,11 @@ LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end } else { - const auto & log_location = logs_location.at(i); + auto location_it = logs_location.find(i); + if (location_it == logs_location.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Location of log entry with index {} is missing", i); + + const auto & log_location = location_it->second; if (!read_info) set_new_file(log_location); diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 05f691ca76b..d72d39fd7e1 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -169,6 +169,23 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const writeText("async_replication=", buf); write_bool(coordination_settings->async_replication); + + writeText("latest_logs_cache_size_threshold=", buf); + write_int(coordination_settings->latest_logs_cache_size_threshold); + writeText("commit_logs_cache_size_threshold=", buf); + write_int(coordination_settings->commit_logs_cache_size_threshold); + + writeText("disk_move_retries_wait_ms=", buf); + write_int(coordination_settings->disk_move_retries_wait_ms); + writeText("disk_move_retries_during_init=", buf); + write_int(coordination_settings->disk_move_retries_during_init); + + writeText("log_slow_total_threshold_ms=", buf); + write_int(coordination_settings->log_slow_total_threshold_ms); + writeText("log_slow_cpu_threshold_ms=", buf); + write_int(coordination_settings->log_slow_cpu_threshold_ms); + writeText("log_slow_connection_operation_threshold_ms=", buf); + write_int(coordination_settings->log_slow_connection_operation_threshold_ms); } KeeperConfigurationAndSettingsPtr diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index 4a898d1975c..5116a814871 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -59,7 +59,10 @@ struct Settings; M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \ M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \ M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ - M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) + M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \ + M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \ + M(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \ + M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index 8251dca3d1e..b4241235cc7 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -150,12 +150,18 @@ M(S3PutObject) \ M(S3GetObject) \ \ - M(AzureUploadPart) \ - M(DiskAzureUploadPart) \ + M(AzureUpload) \ + M(DiskAzureUpload) \ + M(AzureStageBlock) \ + M(DiskAzureStageBlock) \ + M(AzureCommitBlockList) \ + M(DiskAzureCommitBlockList) \ M(AzureCopyObject) \ M(DiskAzureCopyObject) \ M(AzureDeleteObjects) \ + M(DiskAzureDeleteObjects) \ M(AzureListObjects) \ + M(DiskAzureListObjects) \ \ M(DiskS3DeleteObjects) \ M(DiskS3CopyObject) \ @@ -238,6 +244,13 @@ M(KeeperPacketsReceived) \ M(KeeperRequestTotal) \ M(KeeperLatency) \ + M(KeeperTotalElapsedMicroseconds) \ + M(KeeperProcessElapsedMicroseconds) \ + M(KeeperPreprocessElapsedMicroseconds) \ + M(KeeperStorageLockWaitMicroseconds) \ + M(KeeperCommitWaitElapsedMicroseconds) \ + M(KeeperBatchMaxCount) \ + M(KeeperBatchMaxTotalSize) \ M(KeeperCommits) \ M(KeeperCommitsFailed) \ M(KeeperSnapshotCreations) \ @@ -258,7 +271,8 @@ M(KeeperExistsRequest) \ \ M(IOUringSQEsSubmitted) \ - M(IOUringSQEsResubmits) \ + M(IOUringSQEsResubmitsAsync) \ + M(IOUringSQEsResubmitsSync) \ M(IOUringCQEsCompleted) \ M(IOUringCQEsFailed) \ \ diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index fd23ab99be1..c7d49f16c93 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -31,6 +31,13 @@ namespace CurrentMetrics extern const Metric KeeperOutstandingRequets; } +namespace ProfileEvents +{ + extern const Event KeeperCommitWaitElapsedMicroseconds; + extern const Event KeeperBatchMaxCount; + extern const Event KeeperBatchMaxTotalSize; +} + using namespace std::chrono_literals; namespace DB @@ -119,6 +126,7 @@ void KeeperDispatcher::requestThread() auto coordination_settings = configuration_and_settings->coordination_settings; uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds(); uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size; + size_t max_batch_size = coordination_settings->max_requests_batch_size; /// The code below do a very simple thing: batch all write (quorum) requests into vector until /// previous write batch is not finished or max_batch size achieved. The main complexity goes from @@ -188,7 +196,6 @@ void KeeperDispatcher::requestThread() return false; }; - size_t max_batch_size = coordination_settings->max_requests_batch_size; while (!shutdown_called && current_batch.size() < max_batch_size && !has_reconfig_request && current_batch_bytes_size < max_batch_bytes_size && try_get_request()) ; @@ -225,6 +232,12 @@ void KeeperDispatcher::requestThread() /// Process collected write requests batch if (!current_batch.empty()) { + if (current_batch.size() == max_batch_size) + ProfileEvents::increment(ProfileEvents::KeeperBatchMaxCount, 1); + + if (current_batch_bytes_size == max_batch_bytes_size) + ProfileEvents::increment(ProfileEvents::KeeperBatchMaxTotalSize, 1); + LOG_TRACE(log, "Processing requests batch, size: {}, bytes: {}", current_batch.size(), current_batch_bytes_size); auto result = server->putRequestBatch(current_batch); @@ -243,6 +256,8 @@ void KeeperDispatcher::requestThread() /// If we will execute read or reconfig next, we have to process result now if (execute_requests_after_write) { + Stopwatch watch; + SCOPE_EXIT(ProfileEvents::increment(ProfileEvents::KeeperCommitWaitElapsedMicroseconds, watch.elapsedMicroseconds())); if (prev_result) result_buf = forceWaitAndProcessResult( prev_result, prev_batch, /*clear_requests_on_success=*/!execute_requests_after_write); @@ -319,19 +334,13 @@ void KeeperDispatcher::snapshotThread() { setThreadName("KeeperSnpT"); const auto & shutdown_called = keeper_context->isShutdownCalled(); - while (!shutdown_called) + CreateSnapshotTask task; + while (snapshots_queue.pop(task)) { - CreateSnapshotTask task; - if (!snapshots_queue.pop(task)) - break; - try { auto snapshot_file_info = task.create_snapshot(std::move(task.snapshot), /*execute_only_cleanup=*/shutdown_called); - if (shutdown_called) - break; - if (!snapshot_file_info) continue; diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index bde59d77806..7352873107c 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -7,11 +7,12 @@ #include #include #include +#include #include #include -#include #include #include +#include #include #include #include @@ -27,7 +28,7 @@ #include #include #include -#include +#include #pragma clang diagnostic ignored "-Wdeprecated-declarations" #include @@ -380,6 +381,8 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co LockMemoryExceptionInThread::removeUniqueLock(); }; + asio_opts.thread_pool_size_ = getNumberOfPhysicalCPUCores(); + if (state_manager->isSecure()) { #if USE_SSL diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 9991bef7be5..66ac2be810e 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -64,7 +65,8 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo return; } - auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); + const auto & settings = Context::getGlobalContextInstance()->getSettingsRef(); + auto auth_settings = S3::AuthSettings(config, settings, config_prefix); String endpoint = macros->expand(config.getString(config_prefix + ".endpoint")); auto new_uri = S3::URI{endpoint}; @@ -118,10 +120,10 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo std::move(headers), S3::CredentialsConfiguration { - auth_settings.use_environment_credentials.value_or(true), - auth_settings.use_insecure_imds_request.value_or(false), - auth_settings.expiration_window_seconds.value_or(S3::DEFAULT_EXPIRATION_WINDOW_SECONDS), - auth_settings.no_sign_request.value_or(false), + auth_settings.use_environment_credentials, + auth_settings.use_insecure_imds_request, + auth_settings.expiration_window_seconds, + auth_settings.no_sign_request, }, credentials.GetSessionToken()); @@ -154,7 +156,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh if (s3_client == nullptr) return; - S3Settings::RequestSettings request_settings_1; + S3::RequestSettings request_settings_1; const auto create_writer = [&](const auto & key) { @@ -197,7 +199,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh lock_writer.finalize(); // We read back the written UUID, if it's the same we can upload the file - S3Settings::RequestSettings request_settings_2; + S3::RequestSettings request_settings_2; request_settings_2.max_single_read_retries = 1; ReadBufferFromS3 lock_reader { diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 50d9db9dd3b..b25299ce02c 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -1,12 +1,14 @@ #include #include +#include +#include +#include #include #include -#include #include -#include #include #include +#include #include #include #include @@ -17,7 +19,6 @@ #include #include #include -#include namespace ProfileEvents @@ -31,6 +32,7 @@ namespace ProfileEvents extern const Event KeeperSnapshotApplysFailed; extern const Event KeeperReadSnapshot; extern const Event KeeperSaveSnapshot; + extern const Event KeeperStorageLockWaitMicroseconds; } namespace DB @@ -172,6 +174,20 @@ void assertDigest( } } +struct TSA_SCOPED_LOCKABLE LockGuardWithStats final +{ + std::unique_lock lock; + explicit LockGuardWithStats(std::mutex & mutex) TSA_ACQUIRE(mutex) + { + Stopwatch watch; + std::unique_lock l(mutex); + ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds()); + lock = std::move(l); + } + + ~LockGuardWithStats() TSA_RELEASE() = default; +}; + } template @@ -295,7 +311,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorageBase::RequestFor if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) return true; - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); if (storage->isFinalized()) return false; @@ -326,7 +342,7 @@ bool KeeperStateMachine::preprocess(const KeeperStorageBase::RequestFor template void KeeperStateMachine::reconfigure(const KeeperStorageBase::RequestForSession& request_for_session) { - std::lock_guard _(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); KeeperStorageBase::ResponseForSession response = processReconfiguration(request_for_session); if (!responses_queue.push(response)) { @@ -417,7 +433,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t l if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session)) return nullptr; - auto try_push = [this](const KeeperStorageBase::ResponseForSession& response) + auto try_push = [&](const KeeperStorageBase::ResponseForSession& response) { if (!responses_queue.push(response)) { @@ -426,6 +442,17 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t l "Failed to push response with session id {} to the queue, probably because of shutdown", response.session_id); } + + using namespace std::chrono; + uint64_t elapsed = duration_cast(system_clock::now().time_since_epoch()).count() - request_for_session->time; + if (elapsed > keeper_context->getCoordinationSettings()->log_slow_total_threshold_ms) + { + LOG_INFO( + log, + "Total time to process a request took too long ({}ms).\nRequest info: {}", + elapsed, + request_for_session->request->toString(/*short_format=*/true)); + } }; try @@ -443,7 +470,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t l response_for_session.session_id = -1; response_for_session.response = response; - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); session_id = storage->getSessionID(session_id_request.session_timeout_ms); LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); response->session_id = session_id; @@ -452,12 +479,13 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t l else { if (op_num == Coordination::OpNum::Close) + { std::lock_guard lock(request_cache_mutex); parsed_request_cache.erase(request_for_session->session_id); } - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); KeeperStorageBase::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); for (auto & response_for_session : responses_for_sessions) @@ -509,7 +537,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) } { /// deserialize and apply snapshot to storage - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); SnapshotDeserializationResult snapshot_deserialization_result; if (latest_snapshot_ptr) @@ -562,7 +590,7 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorageBase::Reque if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) return; - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); storage->rollbackRequest(request_for_session.zxid, allow_missing); } @@ -591,7 +619,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft:: auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf); CreateSnapshotTask snapshot_task; { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); snapshot_task.snapshot = std::make_shared>(storage.get(), snapshot_meta_copy, getClusterConfig()); } @@ -599,7 +627,7 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft:: snapshot_task.create_snapshot = [this, when_done](KeeperStorageSnapshotPtr && snapshot_, bool execute_only_cleanup) { nuraft::ptr exception(nullptr); - bool ret = true; + bool ret = false; auto && snapshot = std::get>>(std::move(snapshot_)); if (!execute_only_cleanup) { @@ -630,7 +658,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft:: else { auto snapshot_buf = snapshot_manager.serializeSnapshotToBuffer(*snapshot); - auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk(*snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); + auto snapshot_info = snapshot_manager.serializeSnapshotBufferToDisk( + *snapshot_buf, snapshot->snapshot_meta->get_last_log_idx()); latest_snapshot_info = std::move(snapshot_info); latest_snapshot_buf = std::move(snapshot_buf); } @@ -643,18 +672,19 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft:: latest_snapshot_info->path); } } + + ret = true; } catch (...) { ProfileEvents::increment(ProfileEvents::KeeperSnapshotCreationsFailed); LOG_TRACE(log, "Exception happened during snapshot"); tryLogCurrentException(log); - ret = false; } } { /// Destroy snapshot with lock - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); LOG_TRACE(log, "Clearing garbage after snapshot"); /// Turn off "snapshot mode" and clear outdate part of storage state storage->clearGarbageAfterSnapshot(); @@ -797,7 +827,7 @@ template void KeeperStateMachine::processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) { /// Pure local request, just process it with storage - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); auto responses = storage->processRequest( request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/); for (const auto & response : responses) @@ -808,112 +838,112 @@ void KeeperStateMachine::processReadRequest(const KeeperStorageBase::Re template void KeeperStateMachine::shutdownStorage() { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); storage->finalize(); } template std::vector KeeperStateMachine::getDeadSessions() { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getDeadSessions(); } template int64_t KeeperStateMachine::getNextZxid() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNextZXID(); } template KeeperStorageBase::Digest KeeperStateMachine::getNodesDigest() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesDigest(false); } template uint64_t KeeperStateMachine::getLastProcessedZxid() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getZXID(); } template uint64_t KeeperStateMachine::getNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getNodesCount(); } template uint64_t KeeperStateMachine::getTotalWatchesCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalWatchesCount(); } template uint64_t KeeperStateMachine::getWatchedPathsCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getWatchedPathsCount(); } template uint64_t KeeperStateMachine::getSessionsWithWatchesCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionsWithWatchesCount(); } template uint64_t KeeperStateMachine::getTotalEphemeralNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getTotalEphemeralNodesCount(); } template uint64_t KeeperStateMachine::getSessionWithEphemeralNodesCount() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getSessionWithEphemeralNodesCount(); } template void KeeperStateMachine::dumpWatches(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatches(buf); } template void KeeperStateMachine::dumpWatchesByPath(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpWatchesByPath(buf); } template void KeeperStateMachine::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); storage->dumpSessionsAndEphemerals(buf); } template uint64_t KeeperStateMachine::getApproximateDataSize() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getApproximateDataSize(); } template uint64_t KeeperStateMachine::getKeyArenaSize() const { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); return storage->getArenaDataSize(); } @@ -956,7 +986,7 @@ ClusterConfigPtr IKeeperStateMachine::getClusterConfig() const template void KeeperStateMachine::recalculateStorageStats() { - std::lock_guard lock(storage_and_responses_lock); + LockGuardWithStats lock(storage_and_responses_lock); LOG_INFO(log, "Recalculating storage stats"); storage->recalculateStats(); LOG_INFO(log, "Done recalculating storage stats"); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 3efa9da76c3..4305e3b173d 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -253,8 +253,7 @@ private: /// Save/Load and Serialize/Deserialize logic for snapshots. KeeperSnapshotManager snapshot_manager; - KeeperStorageBase::ResponseForSession processReconfiguration( - const KeeperStorageBase::RequestForSession& request_for_session) + KeeperStorageBase::ResponseForSession processReconfiguration(const KeeperStorageBase::RequestForSession & request_for_session) TSA_REQUIRES(storage_and_responses_lock) override; }; } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 685d03c4237..759e176b95c 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -41,6 +41,8 @@ namespace ProfileEvents extern const Event KeeperGetRequest; extern const Event KeeperListRequest; extern const Event KeeperExistsRequest; + extern const Event KeeperPreprocessElapsedMicroseconds; + extern const Event KeeperProcessElapsedMicroseconds; } namespace DB @@ -619,6 +621,10 @@ bool KeeperStorage::UncommittedState::hasACL(int64_t session_id, bool if (is_local) return check_auth(storage.session_and_auth[session_id]); + /// we want to close the session and with that we will remove all the auth related to the session + if (closed_sessions.contains(session_id)) + return false; + if (check_auth(storage.session_and_auth[session_id])) return true; @@ -645,6 +651,10 @@ void KeeperStorage::UncommittedState::addDelta(Delta new_delta) auto & uncommitted_auth = session_and_auth[auth_delta->session_id]; uncommitted_auth.emplace_back(&auth_delta->auth_id); } + else if (const auto * close_session_delta = std::get_if(&added_delta.operation)) + { + closed_sessions.insert(close_session_delta->session_id); + } } template @@ -697,7 +707,10 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) uncommitted_auth.pop_front(); if (uncommitted_auth.empty()) session_and_auth.erase(add_auth->session_id); - + } + else if (auto * close_session = std::get_if(&front_delta.operation)) + { + closed_sessions.erase(close_session->session_id); } deltas.pop_front(); @@ -771,6 +784,10 @@ void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid) session_and_auth.erase(add_auth->session_id); } } + else if (auto * close_session = std::get_if(&delta_it->operation)) + { + closed_sessions.erase(close_session->session_id); + } } if (delta_it == deltas.rend()) @@ -974,6 +991,10 @@ Coordination::Error KeeperStorage::commit(int64_t commit_zxid) session_and_auth[operation.session_id].emplace_back(std::move(operation.auth_id)); return Coordination::Error::ZOK; } + else if constexpr (std::same_as) + { + return Coordination::Error::ZOK; + } else { // shouldn't be called in any process functions @@ -1118,9 +1139,11 @@ struct KeeperStorageHeartbeatRequestProcessor final : public KeeperStorageReques using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; Coordination::ZooKeeperResponsePtr - process(Storage & /* storage */, int64_t /* zxid */) const override + process(Storage & storage, int64_t zxid) const override { - return this->zk_request->makeResponse(); + Coordination::ZooKeeperResponsePtr response_ptr = this->zk_request->makeResponse(); + response_ptr->error = storage.commit(zxid); + return response_ptr; } }; @@ -2451,6 +2474,20 @@ void KeeperStorage::preprocessRequest( std::optional digest, int64_t log_idx) { + Stopwatch watch; + SCOPE_EXIT({ + auto elapsed = watch.elapsedMicroseconds(); + if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms) + { + LOG_INFO( + getLogger("KeeperStorage"), + "Preprocessing a request took too long ({}ms).\nRequest info: {}", + elapsed_ms, + zk_request->toString(/*short_format=*/true)); + } + ProfileEvents::increment(ProfileEvents::KeeperPreprocessElapsedMicroseconds, elapsed); + }); + if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized"); @@ -2530,6 +2567,7 @@ void KeeperStorage::preprocessRequest( ephemerals.erase(session_ephemerals); } + new_deltas.emplace_back(transaction.zxid, CloseSessionDelta{session_id}); new_digest = calculateNodesDigest(new_digest, new_deltas); return; } @@ -2551,6 +2589,20 @@ KeeperStorage::ResponsesForSessions KeeperStorage::process bool check_acl, bool is_local) { + Stopwatch watch; + SCOPE_EXIT({ + auto elapsed = watch.elapsedMicroseconds(); + if (auto elapsed_ms = elapsed / 1000; elapsed_ms > keeper_context->getCoordinationSettings()->log_slow_cpu_threshold_ms) + { + LOG_INFO( + getLogger("KeeperStorage"), + "Processing a request took too long ({}ms).\nRequest info: {}", + elapsed_ms, + zk_request->toString(/*short_format=*/true)); + } + ProfileEvents::increment(ProfileEvents::KeeperProcessElapsedMicroseconds, elapsed); + }); + if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "KeeperStorage system nodes are not initialized"); @@ -2592,8 +2644,6 @@ KeeperStorage::ResponsesForSessions KeeperStorage::process } } - uncommitted_state.commit(zxid); - clearDeadWatches(session_id); auto auth_it = session_and_auth.find(session_id); if (auth_it != session_and_auth.end()) @@ -2638,7 +2688,6 @@ KeeperStorage::ResponsesForSessions KeeperStorage::process else { response = request_processor->process(*this, zxid); - uncommitted_state.commit(zxid); } /// Watches for this requests are added to the watches lists @@ -2678,6 +2727,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::process results.push_back(ResponseForSession{session_id, response}); } + uncommitted_state.commit(zxid); return results; } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index c2fd196b95e..586ad115fd3 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -533,8 +533,13 @@ public: AuthID auth_id; }; + struct CloseSessionDelta + { + int64_t session_id; + }; + using Operation = std:: - variant; + variant; struct Delta { @@ -570,6 +575,7 @@ public: std::shared_ptr tryGetNodeFromStorage(StringRef path) const; std::unordered_map> session_and_auth; + std::unordered_set closed_sessions; struct UncommittedNode { diff --git a/src/Coordination/RaftServerConfig.h b/src/Coordination/RaftServerConfig.h index 0ecbd6464c1..37b6a92ba70 100644 --- a/src/Coordination/RaftServerConfig.h +++ b/src/Coordination/RaftServerConfig.h @@ -57,7 +57,7 @@ using ClusterUpdateActions = std::vector; template <> struct fmt::formatter : fmt::formatter { - constexpr auto format(const DB::RaftServerConfig & server, format_context & ctx) + constexpr auto format(const DB::RaftServerConfig & server, format_context & ctx) const { return fmt::format_to( ctx.out(), "server.{}={};{};{}", server.id, server.endpoint, server.learner ? "learner" : "participant", server.priority); @@ -67,7 +67,7 @@ struct fmt::formatter : fmt::formatter template <> struct fmt::formatter : fmt::formatter { - constexpr auto format(const DB::ClusterUpdateAction & action, format_context & ctx) + constexpr auto format(const DB::ClusterUpdateAction & action, format_context & ctx) const { if (const auto * add = std::get_if(&action)) return fmt::format_to(ctx.out(), "(Add server {})", add->id); diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index 04d9af7ca38..85452558496 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB { diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 2af8a015c2d..2017adcc58d 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include @@ -146,7 +146,7 @@ struct ContextSharedPart : boost::noncopyable mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes - std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage + std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage mutable std::mutex keeper_dispatcher_mutex; mutable std::shared_ptr keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex); @@ -455,14 +455,14 @@ std::shared_ptr Context::getZooKeeper() const throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper"); } -const StorageS3Settings & Context::getStorageS3Settings() const +const S3SettingsByEndpoint & Context::getStorageS3Settings() const { std::lock_guard lock(shared->mutex); if (!shared->storage_s3_settings) { const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config(); - shared->storage_s3_settings.emplace().loadFromConfig("s3", config, getSettingsRef()); + shared->storage_s3_settings.emplace().loadFromConfig(config, "s3", getSettingsRef()); } return *shared->storage_s3_settings; diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index 79a3e32a72d..d3bbfececed 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -37,7 +37,7 @@ class FilesystemCacheLog; class FilesystemReadPrefetchesLog; class BlobStorageLog; class IOUringReader; -class StorageS3Settings; +class S3SettingsByEndpoint; /// A small class which owns ContextShared. /// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete. @@ -130,7 +130,8 @@ public: enum class ApplicationType : uint8_t { - KEEPER + KEEPER, + SERVER, }; void setApplicationType(ApplicationType) {} @@ -163,7 +164,7 @@ public: zkutil::ZooKeeperPtr getZooKeeper() const; - const StorageS3Settings & getStorageS3Settings() const; + const S3SettingsByEndpoint & getStorageS3Settings() const; const String & getUserName() const { static std::string user; return user; } diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 9f0937572a9..2f505873c65 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -2091,6 +2091,191 @@ TYPED_TEST(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecomm EXPECT_EQ(acls[0].permissions, 31); } +TYPED_TEST(CoordinationTest, TestPreprocessWhenCloseSessionIsPrecommitted) +{ + using namespace Coordination; + using namespace DB; + + ChangelogDirTest snapshots("./snapshots"); + this->setSnapshotDirectory("./snapshots"); + + using Storage = typename TestFixture::Storage; + + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + ResponsesQueue queue(std::numeric_limits::max()); + SnapshotsQueue snapshots_queue{1}; + int64_t session_without_auth = 1; + int64_t session_with_auth = 2; + size_t term = 0; + + auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); + state_machine->init(); + + auto & storage = state_machine->getStorageUnsafe(); + const auto & uncommitted_state = storage.uncommitted_state; + + auto auth_req = std::make_shared(); + auth_req->scheme = "digest"; + auth_req->data = "test_user:test_password"; + + // Add auth data to the session + auto auth_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), auth_req); + state_machine->pre_commit(1, auth_entry->get_buf()); + state_machine->commit(1, auth_entry->get_buf()); + + std::string node_without_acl = "/node_without_acl"; + { + auto create_req = std::make_shared(); + create_req->path = node_without_acl; + create_req->data = "notmodified"; + auto create_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), create_req); + state_machine->pre_commit(2, create_entry->get_buf()); + state_machine->commit(2, create_entry->get_buf()); + ASSERT_TRUE(storage.container.contains(node_without_acl)); + } + + std::string node_with_acl = "/node_with_acl"; + { + auto create_req = std::make_shared(); + create_req->path = node_with_acl; + create_req->data = "notmodified"; + create_req->acls = {{.permissions = ACL::All, .scheme = "auth", .id = ""}}; + auto create_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), create_req); + state_machine->pre_commit(3, create_entry->get_buf()); + state_machine->commit(3, create_entry->get_buf()); + ASSERT_TRUE(storage.container.contains(node_with_acl)); + } + + auto set_req_with_acl = std::make_shared(); + set_req_with_acl->path = node_with_acl; + set_req_with_acl->data = "modified"; + + auto set_req_without_acl = std::make_shared(); + set_req_without_acl->path = node_without_acl; + set_req_without_acl->data = "modified"; + + const auto reset_node_value + = [&](const auto & path) { storage.container.updateValue(path, [](auto & node) { node.setData("notmodified"); }); }; + + auto close_req = std::make_shared(); + + { + SCOPED_TRACE("Session with Auth"); + + // test we can modify both nodes + auto set_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(5, set_entry->get_buf()); + state_machine->commit(5, set_entry->get_buf()); + ASSERT_TRUE(storage.container.find(node_with_acl)->value.getData() == "modified"); + reset_node_value(node_with_acl); + + set_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(6, set_entry->get_buf()); + state_machine->commit(6, set_entry->get_buf()); + ASSERT_TRUE(storage.container.find(node_without_acl)->value.getData() == "modified"); + reset_node_value(node_without_acl); + + auto close_entry = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), close_req); + + // Pre-commit close session + state_machine->pre_commit(7, close_entry->get_buf()); + + /// will be rejected because we don't have required auth + auto set_entry_with_acl = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(8, set_entry_with_acl->get_buf()); + + /// will be accepted because no ACL + auto set_entry_without_acl = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(9, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(uncommitted_state.getNode(node_with_acl)->getData() == "notmodified"); + ASSERT_TRUE(uncommitted_state.getNode(node_without_acl)->getData() == "modified"); + + state_machine->rollback(9, set_entry_without_acl->get_buf()); + state_machine->rollback(8, set_entry_with_acl->get_buf()); + + // let's commit close and verify we get same outcome + state_machine->commit(7, close_entry->get_buf()); + + /// will be rejected because we don't have required auth + set_entry_with_acl = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(8, set_entry_with_acl->get_buf()); + + /// will be accepted because no ACL + set_entry_without_acl = getLogEntryFromZKRequest(term, session_with_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(9, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(uncommitted_state.getNode(node_with_acl)->getData() == "notmodified"); + ASSERT_TRUE(uncommitted_state.getNode(node_without_acl)->getData() == "modified"); + + state_machine->commit(8, set_entry_with_acl->get_buf()); + state_machine->commit(9, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(storage.container.find(node_with_acl)->value.getData() == "notmodified"); + ASSERT_TRUE(storage.container.find(node_without_acl)->value.getData() == "modified"); + + reset_node_value(node_without_acl); + } + + { + SCOPED_TRACE("Session without Auth"); + + // test we can modify only node without acl + auto set_entry = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(10, set_entry->get_buf()); + state_machine->commit(10, set_entry->get_buf()); + ASSERT_TRUE(storage.container.find(node_with_acl)->value.getData() == "notmodified"); + + set_entry = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(11, set_entry->get_buf()); + state_machine->commit(11, set_entry->get_buf()); + ASSERT_TRUE(storage.container.find(node_without_acl)->value.getData() == "modified"); + reset_node_value(node_without_acl); + + auto close_entry = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), close_req); + + // Pre-commit close session + state_machine->pre_commit(12, close_entry->get_buf()); + + /// will be rejected because we don't have required auth + auto set_entry_with_acl = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(13, set_entry_with_acl->get_buf()); + + /// will be accepted because no ACL + auto set_entry_without_acl = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(14, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(uncommitted_state.getNode(node_with_acl)->getData() == "notmodified"); + ASSERT_TRUE(uncommitted_state.getNode(node_without_acl)->getData() == "modified"); + + state_machine->rollback(14, set_entry_without_acl->get_buf()); + state_machine->rollback(13, set_entry_with_acl->get_buf()); + + // let's commit close and verify we get same outcome + state_machine->commit(12, close_entry->get_buf()); + + /// will be rejected because we don't have required auth + set_entry_with_acl = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_with_acl); + state_machine->pre_commit(13, set_entry_with_acl->get_buf()); + + /// will be accepted because no ACL + set_entry_without_acl = getLogEntryFromZKRequest(term, session_without_auth, state_machine->getNextZxid(), set_req_without_acl); + state_machine->pre_commit(14, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(uncommitted_state.getNode(node_with_acl)->getData() == "notmodified"); + ASSERT_TRUE(uncommitted_state.getNode(node_without_acl)->getData() == "modified"); + + state_machine->commit(13, set_entry_with_acl->get_buf()); + state_machine->commit(14, set_entry_without_acl->get_buf()); + + ASSERT_TRUE(storage.container.find(node_with_acl)->value.getData() == "notmodified"); + ASSERT_TRUE(storage.container.find(node_without_acl)->value.getData() == "modified"); + + reset_node_value(node_without_acl); + } +} + TYPED_TEST(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) { using namespace Coordination; @@ -2099,10 +2284,14 @@ TYPED_TEST(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitte ChangelogDirTest snapshots("./snapshots"); this->setSnapshotDirectory("./snapshots"); + ChangelogDirTest rocks("./rocksdb"); + this->setRocksDBDirectory("./rocksdb"); + ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); + using Storage = typename TestFixture::Storage; + auto state_machine = std::make_shared>(queue, snapshots_queue, this->keeper_context, nullptr); state_machine->init(); String user_auth_data = "test_user:test_password"; diff --git a/src/Core/BaseSettings.h b/src/Core/BaseSettings.h index adf7a41193c..6242d78aee7 100644 --- a/src/Core/BaseSettings.h +++ b/src/Core/BaseSettings.h @@ -108,6 +108,7 @@ public: public: const String & getName() const; Field getValue() const; + void setValue(const Field & value); Field getDefaultValue() const; String getValueString() const; String getDefaultValueString() const; @@ -122,10 +123,10 @@ public: private: friend class BaseSettings; - const BaseSettings * settings; + BaseSettings * settings; const typename Traits::Accessor * accessor; size_t index; - std::conditional_t custom_setting; + std::conditional_t custom_setting; }; enum SkipFlags @@ -144,35 +145,50 @@ public: Iterator & operator++(); Iterator operator++(int); /// NOLINT const SettingFieldRef & operator *() const { return field_ref; } + SettingFieldRef & operator *() { return field_ref; } bool operator ==(const Iterator & other) const; bool operator !=(const Iterator & other) const { return !(*this == other); } private: friend class BaseSettings; - Iterator(const BaseSettings & settings_, const typename Traits::Accessor & accessor_, SkipFlags skip_flags_); + Iterator(BaseSettings & settings_, const typename Traits::Accessor & accessor_, SkipFlags skip_flags_); void doSkip(); void setPointerToCustomSetting(); SettingFieldRef field_ref; - std::conditional_t custom_settings_iterator; + std::conditional_t custom_settings_iterator; SkipFlags skip_flags; }; class Range { public: - Range(const BaseSettings & settings_, SkipFlags skip_flags_) : settings(settings_), accessor(Traits::Accessor::instance()), skip_flags(skip_flags_) {} + Range(BaseSettings & settings_, SkipFlags skip_flags_) : settings(settings_), accessor(Traits::Accessor::instance()), skip_flags(skip_flags_) {} Iterator begin() const { return Iterator(settings, accessor, skip_flags); } Iterator end() const { return Iterator(settings, accessor, SKIP_ALL); } private: - const BaseSettings & settings; + BaseSettings & settings; const typename Traits::Accessor & accessor; SkipFlags skip_flags; }; - Range all(SkipFlags skip_flags = SKIP_NONE) const { return Range{*this, skip_flags}; } + class MutableRange + { + public: + MutableRange(BaseSettings & settings_, SkipFlags skip_flags_) : settings(settings_), accessor(Traits::Accessor::instance()), skip_flags(skip_flags_) {} + Iterator begin() { return Iterator(settings, accessor, skip_flags); } + Iterator end() { return Iterator(settings, accessor, SKIP_ALL); } + + private: + BaseSettings & settings; + const typename Traits::Accessor & accessor; + SkipFlags skip_flags; + }; + + Range all(SkipFlags skip_flags = SKIP_NONE) const { return Range{const_cast &>(*this), skip_flags}; } + MutableRange allMutable(SkipFlags skip_flags = SKIP_NONE) { return MutableRange{*this, skip_flags}; } Range allChanged() const { return all(SKIP_UNCHANGED); } Range allUnchanged() const { return all(SKIP_CHANGED); } Range allBuiltin() const { return all(SKIP_CUSTOM); } @@ -608,7 +624,7 @@ const SettingFieldCustom * BaseSettings::tryGetCustomSetting(std::strin } template -BaseSettings::Iterator::Iterator(const BaseSettings & settings_, const typename Traits::Accessor & accessor_, SkipFlags skip_flags_) +BaseSettings::Iterator::Iterator(BaseSettings & settings_, const typename Traits::Accessor & accessor_, SkipFlags skip_flags_) : skip_flags(skip_flags_) { field_ref.settings = &settings_; @@ -741,6 +757,18 @@ Field BaseSettings::SettingFieldRef::getValue() const return accessor->getValue(*settings, index); } +template +void BaseSettings::SettingFieldRef::setValue(const Field & value) +{ + if constexpr (Traits::allow_custom_settings) + { + if (custom_setting) + custom_setting->second = value; + } + else + accessor->setValue(*settings, index, value); +} + template Field BaseSettings::SettingFieldRef::getDefaultValue() const { diff --git a/src/Core/Defines.h b/src/Core/Defines.h index b7675b55b87..6df335a9c8f 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -90,13 +90,13 @@ static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_POLICY = "SLRU"; static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB; static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5l; static constexpr auto DEFAULT_MARK_CACHE_POLICY = "SLRU"; -static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5368_MiB; +static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5_GiB; static constexpr auto DEFAULT_MARK_CACHE_SIZE_RATIO = 0.5l; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY = "SLRU"; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE = 0; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5; static constexpr auto DEFAULT_INDEX_MARK_CACHE_POLICY = "SLRU"; -static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 5368_MiB; +static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 5_GiB; static constexpr auto DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO = 0.3; static constexpr auto DEFAULT_MMAP_CACHE_MAX_SIZE = 1_KiB; /// chosen by rolling dice static constexpr auto DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE = 128_MiB; diff --git a/src/Core/Field.h b/src/Core/Field.h index a78b589c883..f1bb4a72b0d 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -1038,7 +1038,7 @@ struct fmt::formatter } template - auto format(const DB::Field & x, FormatContext & ctx) + auto format(const DB::Field & x, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}", toString(x)); } diff --git a/src/Core/NamesAndTypes.cpp b/src/Core/NamesAndTypes.cpp index d6380a632f1..49ab822c738 100644 --- a/src/Core/NamesAndTypes.cpp +++ b/src/Core/NamesAndTypes.cpp @@ -188,6 +188,18 @@ NamesAndTypesList NamesAndTypesList::filter(const Names & names) const return filter(NameSet(names.begin(), names.end())); } +NamesAndTypesList NamesAndTypesList::eraseNames(const NameSet & names) const +{ + NamesAndTypesList res; + for (const auto & column : *this) + { + if (!names.contains(column.name)) + res.push_back(column); + } + return res; +} + + NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const { /// NOTE: It's better to make a map in `IStorage` than to create it here every time again. diff --git a/src/Core/NamesAndTypes.h b/src/Core/NamesAndTypes.h index 915add9b7bc..29f40c45938 100644 --- a/src/Core/NamesAndTypes.h +++ b/src/Core/NamesAndTypes.h @@ -111,6 +111,9 @@ public: /// Leave only the columns whose names are in the `names`. In `names` there can be superfluous columns. NamesAndTypesList filter(const Names & names) const; + /// Leave only the columns whose names are not in the `names`. + NamesAndTypesList eraseNames(const NameSet & names) const; + /// Unlike `filter`, returns columns in the order in which they go in `names`. NamesAndTypesList addTypes(const Names & names) const; diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 3fc9e089451..4c0848c0706 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -63,6 +63,9 @@ const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET "; /// Marker for SSH-keys-based authentication (passed as the user name) const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION "; +/// Market for JSON Web Token authentication +const char JWT_AUTHENTICAION_MARKER[] = " JWT AUTHENTICATION "; + }; namespace Protocol diff --git a/src/Core/QualifiedTableName.h b/src/Core/QualifiedTableName.h index bf05bd59caf..0fd72c32a54 100644 --- a/src/Core/QualifiedTableName.h +++ b/src/Core/QualifiedTableName.h @@ -125,7 +125,7 @@ namespace fmt } template - auto format(const DB::QualifiedTableName & name, FormatContext & ctx) + auto format(const DB::QualifiedTableName & name, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}.{}", DB::backQuoteIfNeed(name.database), DB::backQuoteIfNeed(name.table)); } diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 45f235116ab..68ac45fa24f 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -14,6 +14,7 @@ class AbstractConfiguration; namespace DB { +// clang-format off #define SERVER_SETTINGS(M, ALIAS) \ M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \ M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \ @@ -85,10 +86,12 @@ namespace DB M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \ M(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \ M(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \ - M(UInt64, page_cache_size, 10ul << 30, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \ + M(UInt64, page_cache_size, 0, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \ M(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \ M(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \ M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \ + M(UInt64, compiled_expression_cache_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE, "Byte size of compiled expressions cache.", 0) \ + M(UInt64, compiled_expression_cache_elements_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES, "Maximum entries in compiled expressions cache.", 0) \ \ M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \ M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \ @@ -97,11 +100,13 @@ namespace DB \ M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ - M(UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_view_num_to_warn, 10000lu, "If number of views is greater than this value, server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_dictionary_num_to_warn, 1000lu, "If number of dictionaries is greater than this value, server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_table_num_to_warn, 5000lu, "If the number of tables is greater than this value, the server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_view_num_to_warn, 10000lu, "If the number of views is greater than this value, the server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_dictionary_num_to_warn, 1000lu, "If the number of dictionaries is greater than this value, the server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_database_num_to_warn, 1000lu, "If the number of databases is greater than this value, the server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \ + M(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \ M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \ \ @@ -146,6 +151,10 @@ namespace DB M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ + M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ + M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ + M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Core/ServerUUID.cpp b/src/Core/ServerUUID.cpp index c2de6be7794..251b407e673 100644 --- a/src/Core/ServerUUID.cpp +++ b/src/Core/ServerUUID.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -11,6 +12,16 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_CREATE_FILE; + extern const int LOGICAL_ERROR; +} + +UUID ServerUUID::get() +{ + if (server_uuid == UUIDHelpers::Nil && + (Context::getGlobalContextInstance()->getApplicationType() == Context::ApplicationType::SERVER || + Context::getGlobalContextInstance()->getApplicationType() == Context::ApplicationType::KEEPER)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ServerUUID is not initialized yet"); + return server_uuid; } void ServerUUID::load(const fs::path & server_uuid_file, Poco::Logger * log) @@ -57,4 +68,9 @@ UUID loadServerUUID(const fs::path & server_uuid_file, Poco::Logger * log) } } +void ServerUUID::setRandomForUnitTests() +{ + server_uuid = UUIDHelpers::generateV4(); +} + } diff --git a/src/Core/ServerUUID.h b/src/Core/ServerUUID.h index 71ae9edc00e..9c7f7d32acc 100644 --- a/src/Core/ServerUUID.h +++ b/src/Core/ServerUUID.h @@ -15,10 +15,12 @@ class ServerUUID public: /// Returns persistent UUID of current clickhouse-server or clickhouse-keeper instance. - static UUID get() { return server_uuid; } + static UUID get(); /// Loads server UUID from file or creates new one. Should be called on daemon startup. static void load(const fs::path & server_uuid_file, Poco::Logger * log); + + static void setRandomForUnitTests(); }; UUID loadServerUUID(const fs::path & server_uuid_file, Poco::Logger * log); diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 8257b94cd9f..9c9c9c1db00 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -142,6 +142,7 @@ void Settings::applyCompatibilitySetting(const String & compatibility_value) return; ClickHouseVersion version(compatibility_value); + const auto & settings_changes_history = getSettingsChangesHistory(); /// Iterate through ClickHouse version in descending order and apply reversed /// changes for each version that is higher that version from compatibility setting for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f4de7ee86fb..d84e5b149f6 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -30,6 +31,7 @@ class IColumn; * for tracking settings changes in different versions and for special `compatibility` setting to work correctly. */ +// clang-format off #define COMMON_SETTINGS(M, ALIAS) \ M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ @@ -78,34 +80,36 @@ class IColumn; M(UInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \ M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \ M(UInt64, connections_with_failover_max_tries, 3, "The maximum number of attempts to connect to replicas.", 0) \ - M(UInt64, s3_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \ + M(UInt64, s3_strict_upload_part_size, S3::DEFAULT_STRICT_UPLOAD_PART_SIZE, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \ M(UInt64, azure_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to Azure blob storage.", 0) \ M(UInt64, azure_max_blocks_in_multipart_upload, 50000, "Maximum number of blocks in multipart upload for Azure.", 0) \ - M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ - M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \ + M(UInt64, s3_min_upload_part_size, S3::DEFAULT_MIN_UPLOAD_PART_SIZE, "The minimum size of part to upload during multipart upload to S3.", 0) \ + M(UInt64, s3_max_upload_part_size, S3::DEFAULT_MAX_UPLOAD_PART_SIZE, "The maximum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, azure_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage.", 0) \ M(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage.", 0) \ - M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ - M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \ + M(UInt64, s3_upload_part_size_multiply_factor, S3::DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ + M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, S3::DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \ + M(UInt64, s3_max_part_number, S3::DEFAULT_MAX_PART_NUMBER, "Maximum part number number for s3 upload part.", 0) \ + M(UInt64, s3_max_single_operation_copy_size, S3::DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE, "Maximum size for a single copy operation in s3", 0) \ M(UInt64, azure_upload_part_size_multiply_factor, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage.", 0) \ M(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor.", 0) \ - M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ + M(UInt64, s3_max_inflight_parts_for_one_file, S3::DEFAULT_MAX_INFLIGHT_PARTS_FOR_ONE_FILE, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ M(UInt64, azure_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ - M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ + M(UInt64, s3_max_single_part_upload_size, S3::DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE, "The maximum size of object to upload using singlepart upload to S3.", 0) \ M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \ - M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ + M(UInt64, s3_max_single_read_retries, S3::DEFAULT_MAX_SINGLE_READ_TRIES, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, azure_max_single_read_retries, 4, "The maximum number of retries during single Azure blob storage read.", 0) \ M(UInt64, azure_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write", 0) \ - M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \ - M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ - M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ + M(UInt64, s3_max_unexpected_write_error_retries, S3::DEFAULT_MAX_UNEXPECTED_WRITE_ERROR_RETRIES, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \ + M(UInt64, s3_max_redirects, S3::DEFAULT_MAX_REDIRECTS, "Max number of S3 redirects hops allowed.", 0) \ + M(UInt64, s3_max_connections, S3::DEFAULT_MAX_CONNECTIONS, "The maximum number of connections per server.", 0) \ M(UInt64, s3_max_get_rps, 0, "Limit on S3 GET request per second rate before throttling. Zero means unlimited.", 0) \ M(UInt64, s3_max_get_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_get_rps`", 0) \ M(UInt64, s3_max_put_rps, 0, "Limit on S3 PUT request per second rate before throttling. Zero means unlimited.", 0) \ M(UInt64, s3_max_put_burst, 0, "Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`", 0) \ - M(UInt64, s3_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ - M(Bool, s3_use_adaptive_timeouts, true, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \ + M(UInt64, s3_list_object_keys_size, S3::DEFAULT_LIST_OBJECT_KEYS_SIZE, "Maximum number of files that could be returned in batch by ListObject request", 0) \ + M(Bool, s3_use_adaptive_timeouts, S3::DEFAULT_USE_ADAPTIVE_TIMEOUTS, "When adaptive timeouts are enabled first two attempts are made with low receive and send timeout", 0) \ M(UInt64, azure_list_object_keys_size, 1000, "Maximum number of files that could be returned in batch by ListObject request", 0) \ M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \ @@ -122,10 +126,10 @@ class IColumn; M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ - M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ - M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ - M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ - M(UInt64, s3_connect_timeout_ms, 1000, "Connection timeout for host from s3 disks.", 0) \ + M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ + M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ + M(UInt64, s3_request_timeout_ms, S3::DEFAULT_REQUEST_TIMEOUT_MS, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ + M(UInt64, s3_connect_timeout_ms, S3::DEFAULT_CONNECT_TIMEOUT_MS, "Connection timeout for host from s3 disks.", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \ M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \ @@ -160,8 +164,8 @@ class IColumn; M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \ \ - M(Bool, allow_statistic_optimize, false, "Allows using statistic to optimize queries", 0) \ - M(Bool, allow_experimental_statistic, false, "Allows using statistic", 0) \ + M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ + M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ \ M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \ M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \ @@ -202,6 +206,8 @@ class IColumn; M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \ + M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \ + M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \ \ M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ @@ -332,7 +338,7 @@ class IColumn; M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ \ M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \ - M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", IMPORTANT) \ + M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \ \ M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \ M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \ @@ -372,7 +378,7 @@ class IColumn; M(UInt64, http_max_fields, 1000000, "Maximum number of fields in HTTP header", 0) \ M(UInt64, http_max_field_name_size, 128 * 1024, "Maximum length of field name in HTTP header", 0) \ M(UInt64, http_max_field_value_size, 128 * 1024, "Maximum length of field value in HTTP header", 0) \ - M(Bool, http_skip_not_found_url_for_globs, true, "Skip url's for globs with HTTP_NOT_FOUND error", 0) \ + M(Bool, http_skip_not_found_url_for_globs, true, "Skip URLs for globs with HTTP_NOT_FOUND error", 0) \ M(Bool, http_make_head_request, true, "Allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size", 0) \ M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \ M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \ @@ -396,7 +402,7 @@ class IColumn; M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ - M(Bool, enable_vertical_final, false, "Not recommended. If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ + M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ \ \ /** Limits during query execution are part of the settings. \ @@ -464,7 +470,7 @@ class IColumn; M(UInt64, max_rows_in_join, 0, "Maximum size of the hash table for JOIN (in number of rows).", 0) \ M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \ M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \ - M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \ + M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key. Can be applied only to hash join and storage join.", IMPORTANT) \ M(JoinAlgorithm, join_algorithm, JoinAlgorithm::DEFAULT, "Specify join algorithm.", 0) \ M(UInt64, cross_join_min_rows_to_compress, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.", 0) \ M(UInt64, cross_join_min_bytes_to_compress, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.", 0) \ @@ -651,7 +657,7 @@ class IColumn; M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \ M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Defines how MySQL types are converted to corresponding ClickHouse types. A comma separated list in any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. decimal: convert NUMERIC and DECIMAL types to Decimal when precision allows it. datetime64: convert DATETIME and TIMESTAMP types to DateTime64 instead of DateTime when precision is not 0. date2Date32: convert DATE to Date32 instead of Date. Takes precedence over date2String. date2String: convert DATE to String instead of Date. Overridden by datetime64.", 0) \ - M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ + M(Bool, optimize_trivial_insert_select, false, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \ M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \ @@ -718,8 +724,8 @@ class IColumn; M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \ M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ - M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ - M(Bool, enable_url_encoding, true, " Allows to enable/disable decoding/encoding path in uri in URL table engine", 0) \ + M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in the URL table engine", 0) \ + M(Bool, enable_url_encoding, true, " Allows to enable/disable decoding/encoding path in URI in the URL table engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ M(UInt64, max_distributed_depth, 5, "Maximum distributed query depth", 0) \ @@ -851,7 +857,7 @@ class IColumn; M(Bool, schema_inference_use_cache_for_azure, true, "Use cache in schema inference while using azure table function", 0) \ M(Bool, schema_inference_use_cache_for_hdfs, true, "Use cache in schema inference while using hdfs table function", 0) \ M(Bool, schema_inference_use_cache_for_url, true, "Use cache in schema inference while using url table function", 0) \ - M(Bool, schema_inference_cache_require_modification_time_for_url, true, "Use schema from cache for URL with last modification time validation (for urls with Last-Modified header)", 0) \ + M(Bool, schema_inference_cache_require_modification_time_for_url, true, "Use schema from cache for URL with last modification time validation (for URLs with Last-Modified header)", 0) \ \ M(String, compatibility, "", "Changes other settings according to provided ClickHouse version. If we know that we changed some behaviour in ClickHouse by changing some settings in some version, this compatibility setting will control these settings", 0) \ \ @@ -891,6 +897,7 @@ class IColumn; M(Bool, geo_distance_returns_float64_on_float64_arguments, true, "If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.", 0) \ M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \ + M(Bool, enable_blob_storage_log, true, "Write information about blob storage operations to system.blob_storage_log table", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ @@ -927,6 +934,7 @@ class IColumn; M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ + M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. @@ -1006,6 +1014,8 @@ class IColumn; M(Char, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \ M(Bool, format_csv_allow_single_quotes, false, "If it is set to true, allow strings in single quotes.", 0) \ M(Bool, format_csv_allow_double_quotes, true, "If it is set to true, allow strings in double quotes.", 0) \ + M(Bool, output_format_csv_serialize_tuple_into_separate_columns, true, "If it set to true, then Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost)", 0) \ + M(Bool, input_format_csv_deserialize_separate_columns_into_tuple, true, "If it set to true, then separate columns written in CSV format can be deserialized to Tuple column.", 0) \ M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \ M(Bool, input_format_csv_allow_cr_end_of_line, false, "If it is set true, \\r will be allowed at end of line not followed by \\n", 0) \ M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices", 0) \ @@ -1044,6 +1054,7 @@ class IColumn; M(UInt64, input_format_max_bytes_to_read_for_schema_inference, 32 * 1024 * 1024, "The maximum bytes of data to read for automatic schema inference", 0) \ M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \ M(Bool, input_format_csv_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference in CSV format", 0) \ + M(Bool, input_format_csv_try_infer_strings_from_quoted_tuples, true, "Interpret quoted tuples in the input data as a value of type String.", 0) \ M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \ M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \ M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \ @@ -1056,7 +1067,8 @@ class IColumn; M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ - M(UInt64, input_format_parquet_max_block_size, 8192, "Max block size for parquet reader.", 0) \ + M(UInt64, input_format_parquet_max_block_size, DEFAULT_BLOCK_SIZE, "Max block size for parquet reader.", 0) \ + M(UInt64, input_format_parquet_prefer_block_bytes, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader", 0) \ M(Bool, input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format Protobuf", 0) \ M(Bool, input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format CapnProto", 0) \ M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format ORC", 0) \ @@ -1080,6 +1092,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ + M(Bool, input_format_json_ignore_key_case, false, "Ignore json key case while read json field from string", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ @@ -1134,7 +1147,9 @@ class IColumn; M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \ M(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, "Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query.", 0) \ M(UInt64Auto, output_format_pretty_color, "auto", "Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal.", 0) \ - M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ + M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ + M(UInt64, output_format_pretty_display_footer_column_names, true, "Display column names in the footer if there are 999 or more rows.", 0) \ + M(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, "Sets the minimum threshold value of rows for which to enable displaying column names in the footer. 50 (default)", 0) \ M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \ M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \ M(Bool, output_format_parquet_string_as_string, true, "Use Parquet String type instead of Binary for String columns.", 0) \ @@ -1146,6 +1161,7 @@ class IColumn; M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \ M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \ M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \ + M(Bool, output_format_parquet_write_page_index, true, "Add a possibility to write page index into parquet files.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy', 'zstd'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp new file mode 100644 index 00000000000..7719fe1e837 --- /dev/null +++ b/src/Core/SettingsChangesHistory.cpp @@ -0,0 +1,324 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + +ClickHouseVersion::ClickHouseVersion(const String & version) +{ + Strings split; + boost::split(split, version, [](char c){ return c == '.'; }); + components.reserve(split.size()); + if (split.empty()) + throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; + + for (const auto & split_element : split) + { + size_t component; + ReadBufferFromString buf(split_element); + if (!tryReadIntText(component, buf) || !buf.eof()) + throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; + components.push_back(component); + } +} + +ClickHouseVersion::ClickHouseVersion(const char * version) + : ClickHouseVersion(String(version)) +{ +} + +String ClickHouseVersion::toString() const +{ + String version = std::to_string(components[0]); + for (size_t i = 1; i < components.size(); ++i) + version += "." + std::to_string(components[i]); + + return version; +} + +// clang-format off +/// History of settings changes that controls some backward incompatible changes +/// across all ClickHouse versions. It maps ClickHouse version to settings changes that were done +/// in this version. This history contains both changes to existing settings and newly added settings. +/// Settings changes is a vector of structs +/// {setting_name, previous_value, new_value, reason}. +/// For newly added setting choose the most appropriate previous_value (for example, if new setting +/// controls new feature and it's 'true' by default, use 'false' as previous_value). +/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) +/// Note: please check if the key already exists to prevent duplicate entries. +static std::initializer_list> settings_changes_history_initializer = +{ + {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."}, + {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, + }}, + {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, + {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, + {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, + {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, + {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, + {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, + {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"}, + {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, + {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, + {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, + {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, + {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"}, + {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"}, + {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."}, + {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."}, + {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"}, + {"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."}, + {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."}, + {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."}, + {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."}, + {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."}, + {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"}, + {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"}, + {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"}, + {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."}, + {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."}, + {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, + }}, + {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, + {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, + {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, + {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, + {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, + {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, + {"http_max_chunk_size", 0, 0, "Internal limitation"}, + {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, + {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, + {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, + {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, + {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, + }}, + {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, + {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, + {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, + {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, + {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, + {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, + {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, + {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, + {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, + {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, + {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, + {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, + {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, + }}, + {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, + {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, + {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, + {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, + {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, + {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, + {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, + {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, + {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."}, + {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, + {"log_processors_profiles", false, true, "Enable by default"}, + {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, + {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"}, + {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, + {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, + {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, + {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"}, + {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, + {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, + {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, + {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, + {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."}, + {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, + {"allow_get_client_http_header", false, false, "Introduced a new function."}, + {"output_format_pretty_row_numbers", false, true, "It is better for usability."}, + {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."}, + {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, + {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, + {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, + {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, + {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, + {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, + {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, + {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, + {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, + {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, + {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, + {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, + {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, + {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, + {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, + }}, + {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, + {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, + {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, + {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, + {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, + {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"}, + {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, + {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, + {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"}, + {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, + {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, + {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, + {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, + {"format_template_row_format", "", "", "Template row format string can be set directly in query"}, + {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, + {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, + {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"}, + {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."}, + {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}, + {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"}, + {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, + {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, + {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, + {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, + {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + }}, + {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, + {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, + {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, + {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, + {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, + {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, + {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, + {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, + {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, + {"enable_vertical_final", false, true, "Use vertical final by default"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, + {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, + {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, + {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, + {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, + {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, + {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, + {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, + {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, + {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, + {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, + {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, + {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, + {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, + {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, + {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, + {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, + {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, + {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, + {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, + {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, + {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, + {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, + {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, + {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, + {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, + {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, + {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, + {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, + {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, + {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, + {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, + {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, + {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, + {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, + {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, + {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, + {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, + {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, + {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, + {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, + {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, + {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, + {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, + {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, + {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, + {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, + {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, + {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, + {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, + {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, + {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, + {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, + {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, + {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, + {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, + {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, + {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, + {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, + {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, + {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, + {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, + {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, + {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, + {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, + {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, + {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, + {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, + {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, + {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, + {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, + {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, + {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, + {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, + {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, + {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, +}; + + +const std::map & getSettingsChangesHistory() +{ + static std::map settings_changes_history; + + static std::once_flag initialized_flag; + std::call_once(initialized_flag, []() + { + for (const auto & setting_change : settings_changes_history_initializer) + { + /// Disallow duplicate keys in the settings changes history. Example: + /// {"21.2", {{"some_setting_1", false, true, "[...]"}}}, + /// [...] + /// {"21.2", {{"some_setting_2", false, true, "[...]"}}}, + /// As std::set has unique keys, one of the entries would be overwritten. + if (settings_changes_history.contains(setting_change.first)) + throw Exception{ErrorCodes::LOGICAL_ERROR, "Detected duplicate version '{}'", setting_change.first.toString()}; + + settings_changes_history[setting_change.first] = setting_change.second; + } + }); + + return settings_changes_history; +} +} diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 3ddacd06a06..b1a69c3b6d6 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -1,62 +1,25 @@ #pragma once #include -#include -#include -#include -#include #include +#include namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class ClickHouseVersion { public: - ClickHouseVersion(const String & version) /// NOLINT(google-explicit-constructor) - { - Strings split; - boost::split(split, version, [](char c){ return c == '.'; }); - components.reserve(split.size()); - if (split.empty()) - throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; + /// NOLINTBEGIN(google-explicit-constructor) + ClickHouseVersion(const String & version); + ClickHouseVersion(const char * version); + /// NOLINTEND(google-explicit-constructor) - for (const auto & split_element : split) - { - size_t component; - ReadBufferFromString buf(split_element); - if (!tryReadIntText(component, buf) || !buf.eof()) - throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; - components.push_back(component); - } - } + String toString() const; - ClickHouseVersion(const char * version) : ClickHouseVersion(String(version)) {} /// NOLINT(google-explicit-constructor) - - String toString() const - { - String version = std::to_string(components[0]); - for (size_t i = 1; i < components.size(); ++i) - version += "." + std::to_string(components[i]); - - return version; - } - - bool operator<(const ClickHouseVersion & other) const - { - return components < other.components; - } - - bool operator>=(const ClickHouseVersion & other) const - { - return components >= other.components; - } + bool operator<(const ClickHouseVersion & other) const { return components < other.components; } + bool operator>=(const ClickHouseVersion & other) const { return components >= other.components; } private: std::vector components; @@ -75,229 +38,6 @@ namespace SettingsChangesHistory using SettingsChanges = std::vector; } -/// History of settings changes that controls some backward incompatible changes -/// across all ClickHouse versions. It maps ClickHouse version to settings changes that were done -/// in this version. This history contains both changes to existing settings and newly added settings. -/// Settings changes is a vector of structs -/// {setting_name, previous_value, new_value, reason}. -/// For newly added setting choose the most appropriate previous_value (for example, if new setting -/// controls new feature and it's 'true' by default, use 'false' as previous_value). -/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) -static std::map settings_changes_history = -{ - {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, - {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, - {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, - {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, - {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, - {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, - {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"}, - {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, - {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, - {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, - {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, - }}, - {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, - {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, - {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, - {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, - {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"http_max_chunk_size", 0, 0, "Internal limitation"}, - {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, - {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, - {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, - {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, - }}, - {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, - {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, - {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, - {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, - {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, - {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, - {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, - {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, - {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, - {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, - {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, - {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, - {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, - }}, - {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, - {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, - {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, - {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, - {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, - {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, - {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, - {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, - {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."}, - {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, - {"log_processors_profiles", false, true, "Enable by default"}, - {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, - {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"}, - {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, - {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, - {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, - {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"}, - {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, - {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, - {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, - {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, - {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."}, - {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, - {"allow_get_client_http_header", false, false, "Introduced a new function."}, - {"output_format_pretty_row_numbers", false, true, "It is better for usability."}, - {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."}, - {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, - {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, - {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, - {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, - {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, - {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, - }}, - {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, - {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, - {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, - {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, - {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, - {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"}, - {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, - {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, - {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"}, - {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, - {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, - {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, - {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, - {"format_template_row_format", "", "", "Template row format string can be set directly in query"}, - {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, - {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, - {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"}, - {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."}, - {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}, - {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"23.4", {{"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}}}, - {"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, -}; +const std::map & getSettingsChangesHistory(); } diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 05985316566..18034d846df 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -201,13 +201,13 @@ IMPLEMENT_SETTING_ENUM(ORCCompression, ErrorCodes::BAD_ARGUMENTS, {"zlib", FormatSettings::ORCCompression::ZLIB}, {"lz4", FormatSettings::ORCCompression::LZ4}}) -IMPLEMENT_SETTING_ENUM(S3QueueMode, ErrorCodes::BAD_ARGUMENTS, - {{"ordered", S3QueueMode::ORDERED}, - {"unordered", S3QueueMode::UNORDERED}}) +IMPLEMENT_SETTING_ENUM(ObjectStorageQueueMode, ErrorCodes::BAD_ARGUMENTS, + {{"ordered", ObjectStorageQueueMode::ORDERED}, + {"unordered", ObjectStorageQueueMode::UNORDERED}}) -IMPLEMENT_SETTING_ENUM(S3QueueAction, ErrorCodes::BAD_ARGUMENTS, - {{"keep", S3QueueAction::KEEP}, - {"delete", S3QueueAction::DELETE}}) +IMPLEMENT_SETTING_ENUM(ObjectStorageQueueAction, ErrorCodes::BAD_ARGUMENTS, + {{"keep", ObjectStorageQueueAction::KEEP}, + {"delete", ObjectStorageQueueAction::DELETE}}) IMPLEMENT_SETTING_ENUM(ExternalCommandStderrReaction, ErrorCodes::BAD_ARGUMENTS, {{"none", ExternalCommandStderrReaction::NONE}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 575cd8700c8..2d65bfc7463 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -341,21 +341,21 @@ DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType) DECLARE_SETTING_ENUM(LocalFSReadMethod) -enum class S3QueueMode : uint8_t +enum class ObjectStorageQueueMode : uint8_t { ORDERED, UNORDERED, }; -DECLARE_SETTING_ENUM(S3QueueMode) +DECLARE_SETTING_ENUM(ObjectStorageQueueMode) -enum class S3QueueAction : uint8_t +enum class ObjectStorageQueueAction : uint8_t { KEEP, DELETE, }; -DECLARE_SETTING_ENUM(S3QueueAction) +DECLARE_SETTING_ENUM(ObjectStorageQueueAction) DECLARE_SETTING_ENUM(ExternalCommandStderrReaction) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index c711da45df2..b2c425ceb79 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -156,6 +157,12 @@ static void signalHandler(int sig, siginfo_t * info, void * context) const ucontext_t * signal_context = reinterpret_cast(context); const StackTrace stack_trace(*signal_context); +#if USE_GWP_ASAN + if (const auto fault_address = reinterpret_cast(info->si_addr); + GWPAsan::isGWPAsanError(fault_address)) + GWPAsan::printReport(fault_address); +#endif + writeBinary(sig, out); writePODBinary(*info, out); writePODBinary(signal_context, out); diff --git a/src/DataTypes/DataTypeCustomGeo.cpp b/src/DataTypes/DataTypeCustomGeo.cpp index f7d05fa3be6..0736d837d46 100644 --- a/src/DataTypes/DataTypeCustomGeo.cpp +++ b/src/DataTypes/DataTypeCustomGeo.cpp @@ -17,6 +17,13 @@ void registerDataTypeDomainGeo(DataTypeFactory & factory) std::make_unique(std::make_unique())); }); + // Custom type for simple line which consists from several segments. + factory.registerSimpleDataTypeCustom("LineString", [] + { + return std::make_pair(DataTypeFactory::instance().get("Array(Point)"), + std::make_unique(std::make_unique())); + }); + // Custom type for simple polygon without holes stored as Array(Point) factory.registerSimpleDataTypeCustom("Ring", [] { diff --git a/src/DataTypes/DataTypeCustomGeo.h b/src/DataTypes/DataTypeCustomGeo.h index c2a83b3e577..0a1c83e4638 100644 --- a/src/DataTypes/DataTypeCustomGeo.h +++ b/src/DataTypes/DataTypeCustomGeo.h @@ -11,6 +11,12 @@ public: DataTypePointName() : DataTypeCustomFixedName("Point") {} }; +class DataTypeLineStringName : public DataTypeCustomFixedName +{ +public: + DataTypeLineStringName() : DataTypeCustomFixedName("LineString") {} +}; + class DataTypeRingName : public DataTypeCustomFixedName { public: diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index db252659d41..0ecb5370a7d 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -174,4 +175,9 @@ DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type) } +bool canContainNull(const IDataType & type) +{ + return type.isNullable() || type.isLowCardinalityNullable() || isDynamic(type) || isVariant(type); +} + } diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index 71abe48c151..7a8a54fdf3a 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -62,4 +62,6 @@ DataTypePtr makeNullableOrLowCardinalityNullableSafe(const DataTypePtr & type); /// Nullable(T) -> T, LowCardinality(Nullable(T)) -> T DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type); +bool canContainNull(const IDataType & type); + } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 46c30240ef8..397ae3d8be9 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -543,6 +543,7 @@ template constexpr bool IsDataTypeNumber = false; template constexpr bool IsDataTypeDateOrDateTime = false; template constexpr bool IsDataTypeDate = false; template constexpr bool IsDataTypeEnum = false; +template constexpr bool IsDataTypeStringOrFixedString = false; template constexpr bool IsDataTypeDecimalOrNumber = IsDataTypeDecimal || IsDataTypeNumber; @@ -556,6 +557,8 @@ class DataTypeDate; class DataTypeDate32; class DataTypeDateTime; class DataTypeDateTime64; +class DataTypeString; +class DataTypeFixedString; template constexpr bool IsDataTypeDecimal> = true; @@ -572,6 +575,9 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime = tru template <> inline constexpr bool IsDataTypeDateOrDateTime = true; template <> inline constexpr bool IsDataTypeDateOrDateTime = true; +template <> inline constexpr bool IsDataTypeStringOrFixedString = true; +template <> inline constexpr bool IsDataTypeStringOrFixedString = true; + template class DataTypeEnum; @@ -623,7 +629,7 @@ struct fmt::formatter } template - auto format(const DB::DataTypePtr & type, FormatContext & ctx) + auto format(const DB::DataTypePtr & type, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}", type->getName()); } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 101a408a039..7a5227ca752 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -531,26 +531,98 @@ void SerializationTuple::serializeTextXML(const IColumn & column, size_t row_num void SerializationTuple::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - WriteBufferFromOwnString wb; - serializeText(column, row_num, wb, settings); - writeCSV(wb.str(), ostr); + if (settings.csv.serialize_tuple_into_separate_columns) + { + for (size_t i = 0; i < elems.size(); ++i) + { + if (i != 0) + writeChar(settings.csv.tuple_delimiter, ostr); + elems[i]->serializeTextCSV(extractElementColumn(column, i), row_num, ostr, settings); + } + } + else + { + WriteBufferFromOwnString wb; + serializeText(column, row_num, wb, settings); + writeCSV(wb.str(), ostr); + } } void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - String s; - readCSV(s, istr, settings.csv); - ReadBufferFromString rb(s); - deserializeText(column, rb, settings, true); + if (settings.csv.deserialize_separate_columns_into_tuple) + { + addElementSafe(elems.size(), column, [&] + { + const size_t size = elems.size(); + for (size_t i = 0; i < size; ++i) + { + if (i != 0) + { + skipWhitespaceIfAny(istr); + assertChar(settings.csv.tuple_delimiter, istr); + skipWhitespaceIfAny(istr); + } + + auto & element_column = extractElementColumn(column, i); + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i]); + else + elems[i]->deserializeTextCSV(element_column, istr, settings); + } + return true; + }); + } + else + { + String s; + readCSV(s, istr, settings.csv); + ReadBufferFromString rb(s); + deserializeText(column, rb, settings, true); + } } bool SerializationTuple::tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - String s; - if (!tryReadCSV(s, istr, settings.csv)) - return false; - ReadBufferFromString rb(s); - return tryDeserializeText(column, rb, settings, true); + if (settings.csv.deserialize_separate_columns_into_tuple) + { + return addElementSafe(elems.size(), column, [&] + { + const size_t size = elems.size(); + for (size_t i = 0; i < size; ++i) + { + if (i != 0) + { + skipWhitespaceIfAny(istr); + if (!checkChar(settings.csv.tuple_delimiter, istr)) + return false; + skipWhitespaceIfAny(istr); + } + + auto & element_column = extractElementColumn(column, i); + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) + { + if (!SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextCSV(element_column, istr, settings, elems[i])) + return false; + } + else + { + if (!elems[i]->tryDeserializeTextCSV(element_column, istr, settings)) + return false; + } + } + + return true; + }); + } + else + { + String s; + if (!tryReadCSV(s, istr, settings.csv)) + return false; + ReadBufferFromString rb(s); + return tryDeserializeText(column, rb, settings, true); + } } struct SerializeBinaryBulkStateTuple : public ISerialization::SerializeBinaryBulkState diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 1f9a81ac671..ec0b4019c2f 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -146,7 +146,7 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( } /// If we started to read a new column, reinitialize variant column in deserialization state. - if (!variant_element_state->variant || result_column->empty()) + if (!variant_element_state->variant || mutable_column->empty()) { variant_element_state->variant = mutable_column->cloneEmpty(); diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 75a01a6190f..c85e8f5688a 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -30,8 +30,8 @@ namespace { friend void tryVisitNestedSelect(const String & query, DDLDependencyVisitorData & data); public: - DDLDependencyVisitorData(const ContextPtr & context_, const QualifiedTableName & table_name_, const ASTPtr & ast_) - : create_query(ast_), table_name(table_name_), current_database(context_->getCurrentDatabase()), context(context_) + DDLDependencyVisitorData(const ContextPtr & global_context_, const QualifiedTableName & table_name_, const ASTPtr & ast_, const String & current_database_) + : create_query(ast_), table_name(table_name_), default_database(global_context_->getCurrentDatabase()), current_database(current_database_), global_context(global_context_) { } @@ -71,8 +71,9 @@ namespace ASTPtr create_query; std::unordered_set skip_asts; QualifiedTableName table_name; + String default_database; String current_database; - ContextPtr context; + ContextPtr global_context; TableNamesSet dependencies; /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. @@ -95,6 +96,11 @@ namespace as_table.database = current_database; dependencies.emplace(as_table); } + + /// Visit nested select query only for views, for other cases it's not + /// an actual dependency as it will be executed only once to fill the table. + if (create.select && !create.isView()) + skip_asts.insert(create.select); } /// The definition of a dictionary: SOURCE(CLICKHOUSE(...)) LAYOUT(...) LIFETIME(...) @@ -103,8 +109,8 @@ namespace if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements) return; - auto config = getDictionaryConfigurationFromAST(create_query->as(), context); - auto info = getInfoIfClickHouseDictionarySource(config, context); + auto config = getDictionaryConfigurationFromAST(create_query->as(), global_context); + auto info = getInfoIfClickHouseDictionarySource(config, global_context); /// We consider only dependencies on local tables. if (!info || !info->is_local) @@ -112,14 +118,21 @@ namespace if (!info->table_name.table.empty()) { + /// If database is not specified in dictionary source, use database of the dictionary itself, not the current/default database. if (info->table_name.database.empty()) - info->table_name.database = current_database; + info->table_name.database = table_name.database; dependencies.emplace(std::move(info->table_name)); } else { - /// We don't have a table name, we have a select query instead + /// We don't have a table name, we have a select query instead. + /// All tables from select query in dictionary definition won't + /// use current database, as this query is executed with global context. + /// Use default database from global context while visiting select query. + String current_database_ = current_database; + current_database = default_database; tryVisitNestedSelect(info->query, *this); + current_database = current_database_; } } @@ -176,7 +189,7 @@ namespace if (auto cluster_name = tryGetClusterNameFromArgument(table_engine, 0)) { - auto cluster = context->tryGetCluster(*cluster_name); + auto cluster = global_context->tryGetCluster(*cluster_name); if (cluster && cluster->getLocalShardCount()) has_local_replicas = true; } @@ -231,7 +244,7 @@ namespace { if (auto cluster_name = tryGetClusterNameFromArgument(function, 0)) { - if (auto cluster = context->tryGetCluster(*cluster_name)) + if (auto cluster = global_context->tryGetCluster(*cluster_name)) { if (cluster->getLocalShardCount()) has_local_replicas = true; @@ -303,7 +316,10 @@ namespace try { /// We're just searching for dependencies here, it's not safe to execute subqueries now. - auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + /// Use copy of the global_context and set current database, because expressions can contain currentDatabase() function. + ContextMutablePtr global_context_copy = Context::createCopy(global_context); + global_context_copy->setCurrentDatabase(current_database); + auto evaluated = evaluateConstantExpressionOrIdentifierAsLiteral(arg, global_context_copy); const auto * literal = evaluated->as(); if (!literal || (literal->value.getType() != Field::Types::String)) return {}; @@ -444,7 +460,7 @@ namespace ParserSelectWithUnionQuery parser; String description = fmt::format("Query for ClickHouse dictionary {}", data.table_name); String fixed_query = removeWhereConditionPlaceholder(query); - const Settings & settings = data.context->getSettingsRef(); + const Settings & settings = data.global_context->getSettingsRef(); ASTPtr select = parseQuery(parser, fixed_query, description, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); @@ -459,12 +475,19 @@ namespace } -TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast) +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database) { - DDLDependencyVisitor::Data data{context, table_name, ast}; + DDLDependencyVisitor::Data data{global_global_context, table_name, ast, current_database}; DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(ast); return std::move(data).getDependencies(); } +TableNamesSet getDependenciesFromDictionaryNestedSelectQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & select_query, const String & current_database) +{ + DDLDependencyVisitor::Data data{global_context, table_name, ast, current_database}; + tryVisitNestedSelect(select_query, data); + return std::move(data).getDependencies(); +} + } diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index 29ea6298b04..400e6b04108 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -13,6 +13,9 @@ using TableNamesSet = std::unordered_set; /// Returns a list of all tables explicitly referenced in the create query of a specified table. /// For example, a column default expression can use dictGet() and thus reference a dictionary. /// Does not validate AST, works a best-effort way. -TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & context, const QualifiedTableName & table_name, const ASTPtr & ast); +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & current_database); + +/// Returns a list of all tables explicitly referenced in the select query specified as a dictionary source. +TableNamesSet getDependenciesFromDictionaryNestedSelectQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast, const String & select_query, const String & current_database); } diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index b8690125aaa..40234abb20f 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -110,19 +110,30 @@ void DDLLoadingDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.global_context); auto info = getInfoIfClickHouseDictionarySource(config, data.global_context); - if (!info || !info->is_local || info->table_name.table.empty()) + if (!info || !info->is_local) return; - if (info->table_name.database.empty()) - info->table_name.database = data.default_database; - data.dependencies.emplace(std::move(info->table_name)); + if (!info->table_name.table.empty()) + { + /// If database is not specified in dictionary source, use database of the dictionary itself, not the current/default database. + if (info->table_name.database.empty()) + info->table_name.database = data.table_name.database; + data.dependencies.emplace(std::move(info->table_name)); + } + else + { + /// We don't have a table name, we have a select query instead that will be executed during dictionary loading. + /// We need to find all tables used in this select query and add them to dependencies. + auto select_query_dependencies = getDependenciesFromDictionaryNestedSelectQuery(data.global_context, data.table_name, data.create_query, info->query, data.default_database); + data.dependencies.merge(select_query_dependencies); + } } void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data) { if (storage.ttl_table) { - auto ttl_dependensies = getDependenciesFromCreateQuery(data.global_context, data.table_name, storage.ttl_table->ptr()); + auto ttl_dependensies = getDependenciesFromCreateQuery(data.global_context, data.table_name, storage.ttl_table->ptr(), data.default_database); data.dependencies.merge(ttl_dependensies); } diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 8edc5b737a6..ccab72cfbae 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -1,20 +1,21 @@ +#include #include +#include #include #include -#include +#include #include #include -#include +#include +#include +#include +#include #include +#include +#include "Common/logger_useful.h" #include #include #include -#include -#include -#include -#include -#include -#include namespace fs = std::filesystem; @@ -393,6 +394,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables() { DetachedTables not_in_use; auto it = detached_tables.begin(); + LOG_DEBUG(log, "There are {} detached tables. Start searching non used tables.", detached_tables.size()); while (it != detached_tables.end()) { if (it->second.unique()) @@ -403,6 +405,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables() else ++it; } + LOG_DEBUG(log, "Found {} non used tables in detached tables.", not_in_use.size()); /// It should be destroyed in caller with released database mutex return not_in_use; } diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index e72834eddbe..233db07cd68 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -186,6 +186,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name)); it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); + CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); } @@ -202,6 +203,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta if (it->second.expiration_iterator != cache_expiration_queue.end()) cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); + CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); } return res; diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index b82cf885b4a..86bf0471b8f 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -154,7 +154,7 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl applyMetadataChangesToCreateQuery(it->second, metadata); /// The create query of the table has been just changed, we need to update dependencies too. - auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); + auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second, local_context->getCurrentDatabase()); auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies); } diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 5cb4198e1a2..b8154372116 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -670,7 +670,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat for (auto it = metadata_files.begin(); it < metadata_files.end(); std::advance(it, batch_size)) { std::span batch{it, std::min(std::next(it, batch_size), metadata_files.end())}; - pool.scheduleOrThrowOnError( + pool.scheduleOrThrow( [batch, &process_metadata_file, &process_tmp_drop_metadata_file]() mutable { setThreadName("DatabaseOnDisk"); @@ -679,7 +679,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat process_metadata_file(file.first); else process_tmp_drop_metadata_file(file.first); - }); + }, Priority{}, getContext()->getSettingsRef().lock_acquire_timeout.totalMicroseconds()); } pool.wait(); } @@ -794,7 +794,7 @@ ASTPtr DatabaseOnDisk::getCreateQueryFromStorage(const String & table_name, cons throw_on_error); create_table_query->set(create_table_query->as()->comment, - std::make_shared("SYSTEM TABLE is built on the fly.")); + std::make_shared(storage->getInMemoryMetadata().comment)); return create_table_query; } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 5d36f1cc3d6..7d4bb07e8ef 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -44,6 +44,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int UNKNOWN_DATABASE_ENGINE; extern const int NOT_IMPLEMENTED; + extern const int UNEXPECTED_NODE_IN_ZOOKEEPER; } static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; @@ -76,6 +77,20 @@ static void setReplicatedEngine(ASTCreateQuery * create_query, ContextPtr contex String replica_path = server_settings.default_replica_path; String replica_name = server_settings.default_replica_name; + /// Check that replica path doesn't exist + Macros::MacroExpansionInfo info; + StorageID table_id = StorageID(create_query->getDatabase(), create_query->getTable(), create_query->uuid); + info.table_id = table_id; + info.expand_special_macros_only = false; + + String zookeeper_path = context->getMacros()->expand(replica_path, info); + if (context->getZooKeeper()->exists(zookeeper_path)) + throw Exception( + ErrorCodes::UNEXPECTED_NODE_IN_ZOOKEEPER, + "Found existing ZooKeeper path {} while trying to convert table {} to replicated. Table will not be converted.", + zookeeper_path, backQuote(table_id.getFullTableName()) + ); + auto args = std::make_shared(); args->children.push_back(std::make_shared(replica_path)); args->children.push_back(std::make_shared(replica_name)); @@ -524,7 +539,7 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta } /// The create query of the table has been just changed, we need to update dependencies too. - auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); + auto ref_dependencies = getDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast, local_context->getCurrentDatabase()); auto loading_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); DatabaseCatalog::instance().updateDependencies(table_id, ref_dependencies, loading_dependencies); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index b91596a555d..4ca9afc49eb 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -65,6 +65,7 @@ static constexpr const char * REPLICATED_DATABASE_MARK = "DatabaseReplicated"; static constexpr const char * DROPPED_MARK = "DROPPED"; static constexpr const char * BROKEN_TABLES_SUFFIX = "_broken_tables"; static constexpr const char * BROKEN_REPLICATED_TABLES_SUFFIX = "_broken_replicated_tables"; +static constexpr const char * FIRST_REPLICA_DATABASE_NAME = "first_replica_database_name"; static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; @@ -73,9 +74,10 @@ zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const return getContext()->getZooKeeper(); } -static inline String getHostID(ContextPtr global_context, const UUID & db_uuid) +static inline String getHostID(ContextPtr global_context, const UUID & db_uuid, bool secure) { - return Cluster::Address::toString(getFQDNOrHostName(), global_context->getTCPPort()) + ':' + toString(db_uuid); + UInt16 port = secure ? global_context->getTCPPortSecure().value_or(DBMS_DEFAULT_SECURE_PORT) : global_context->getTCPPort(); + return Cluster::Address::toString(getFQDNOrHostName(), port) + ':' + toString(db_uuid); } static inline UInt64 getMetadataHash(const String & table_name, const String & metadata) @@ -122,6 +124,13 @@ DatabaseReplicated::DatabaseReplicated( fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef()); replica_group_name = context_->getConfigRef().getString("replica_group_name", ""); + + if (!replica_group_name.empty() && database_name.starts_with(DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX)) + { + context_->addWarningMessage(fmt::format("There's a Replicated database with a name starting from '{}', " + "and replica_group_name is configured. It may cause collisions in cluster names.", + ALL_GROUPS_CLUSTER_PREFIX)); + } } String DatabaseReplicated::getFullReplicaName(const String & shard, const String & replica) @@ -173,13 +182,40 @@ ClusterPtr DatabaseReplicated::tryGetCluster() const return cluster; } -void DatabaseReplicated::setCluster(ClusterPtr && new_cluster) +ClusterPtr DatabaseReplicated::tryGetAllGroupsCluster() const { std::lock_guard lock{mutex}; - cluster = std::move(new_cluster); + if (replica_group_name.empty()) + return nullptr; + + if (cluster_all_groups) + return cluster_all_groups; + + /// Database is probably not created or not initialized yet, it's ok to return nullptr + if (is_readonly) + return cluster_all_groups; + + try + { + cluster_all_groups = getClusterImpl(/*all_groups*/ true); + } + catch (...) + { + tryLogCurrentException(log); + } + return cluster_all_groups; } -ClusterPtr DatabaseReplicated::getClusterImpl() const +void DatabaseReplicated::setCluster(ClusterPtr && new_cluster, bool all_groups) +{ + std::lock_guard lock{mutex}; + if (all_groups) + cluster_all_groups = std::move(new_cluster); + else + cluster = std::move(new_cluster); +} + +ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const { Strings unfiltered_hosts; Strings hosts; @@ -199,17 +235,24 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const "It's possible if the first replica is not fully created yet " "or if the last replica was just dropped or due to logical error", zookeeper_path); - hosts.clear(); - std::vector paths; - for (const auto & host : unfiltered_hosts) - paths.push_back(zookeeper_path + "/replicas/" + host + "/replica_group"); - - auto replica_groups = zookeeper->tryGet(paths); - - for (size_t i = 0; i < paths.size(); ++i) + if (all_groups) { - if (replica_groups[i].data == replica_group_name) - hosts.push_back(unfiltered_hosts[i]); + hosts = unfiltered_hosts; + } + else + { + hosts.clear(); + std::vector paths; + for (const auto & host : unfiltered_hosts) + paths.push_back(zookeeper_path + "/replicas/" + host + "/replica_group"); + + auto replica_groups = zookeeper->tryGet(paths); + + for (size_t i = 0; i < paths.size(); ++i) + { + if (replica_groups[i].data == replica_group_name) + hosts.push_back(unfiltered_hosts[i]); + } } Int32 cversion = stat.cversion; @@ -274,6 +317,11 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const bool treat_local_as_remote = false; bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL; + + String cluster_name = TSA_SUPPRESS_WARNING_FOR_READ(database_name); /// FIXME + if (all_groups) + cluster_name = ALL_GROUPS_CLUSTER_PREFIX + cluster_name; + ClusterConnectionParameters params{ cluster_auth_info.cluster_username, cluster_auth_info.cluster_password, @@ -282,7 +330,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const treat_local_port_as_remote, cluster_auth_info.cluster_secure_connection, Priority{1}, - TSA_SUPPRESS_WARNING_FOR_READ(database_name), /// FIXME + cluster_name, cluster_auth_info.cluster_secret}; return std::make_shared(getContext()->getSettingsRef(), shards, params); @@ -369,8 +417,10 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL return; } - String host_id = getHostID(getContext(), db_uuid); - if (is_create_query || replica_host_id != host_id) + String host_id = getHostID(getContext(), db_uuid, cluster_auth_info.cluster_secure_connection); + String host_id_default = getHostID(getContext(), db_uuid, false); + + if (is_create_query || (replica_host_id != host_id && replica_host_id != host_id_default)) { throw Exception( ErrorCodes::REPLICA_ALREADY_EXISTS, @@ -378,6 +428,14 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL replica_name, shard_name, zookeeper_path, replica_host_id, host_id); } + /// Before 24.6 we always created host_id with insecure port, even if cluster_auth_info.cluster_secure_connection was true. + /// So not to break compatibility, we need to update host_id to secure one if cluster_auth_info.cluster_secure_connection is true. + if (host_id != host_id_default && replica_host_id == host_id_default) + { + current_zookeeper->set(replica_path, host_id, -1); + createEmptyLogEntry(current_zookeeper); + } + /// Check that replica_group_name in ZooKeeper matches the local one and change it if necessary. String zk_replica_group_name; if (!current_zookeeper->tryGet(replica_path + "/replica_group", zk_replica_group_name)) @@ -408,6 +466,13 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL return; } + /// If not exist, create a node with the database name for introspection. + /// Technically, the database may have different names on different replicas, but this is not a usual case and we only save the first one + auto db_name_path = fs::path(zookeeper_path) / FIRST_REPLICA_DATABASE_NAME; + auto error_code = current_zookeeper->trySet(db_name_path, getDatabaseName()); + if (error_code == Coordination::Error::ZNONODE) + current_zookeeper->tryCreate(db_name_path, getDatabaseName(), zkutil::CreateMode::Persistent); + is_readonly = false; } catch (...) @@ -504,7 +569,7 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt "already contains some data and it does not look like Replicated database path.", zookeeper_path); /// Write host name to replica_path, it will protect from multiple replicas with the same name - auto host_id = getHostID(getContext(), db_uuid); + auto host_id = getHostID(getContext(), db_uuid, cluster_auth_info.cluster_secure_connection); for (int attempts = 10; attempts > 0; --attempts) { @@ -1100,7 +1165,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep /// And QualifiedTableName::parseFromString doesn't handle this. auto qualified_name = QualifiedTableName{.database = getDatabaseName(), .table = table_name}; auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_table_query); - tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext(), qualified_name, query_ast)); + tables_dependencies.addDependencies(qualified_name, getDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ast, getContext()->getCurrentDatabase())); } tables_dependencies.checkNoCyclicDependencies(); @@ -1325,6 +1390,13 @@ void DatabaseReplicated::drop(ContextPtr context_) } } +void DatabaseReplicated::renameDatabase(ContextPtr query_context, const String & new_name) +{ + DatabaseAtomic::renameDatabase(query_context, new_name); + auto db_name_path = fs::path(zookeeper_path) / FIRST_REPLICA_DATABASE_NAME; + getZooKeeper()->set(db_name_path, getDatabaseName()); +} + void DatabaseReplicated::stopReplication() { if (ddl_worker) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 55bcf963d37..eab5b2ff931 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -20,6 +20,8 @@ using ClusterPtr = std::shared_ptr; class DatabaseReplicated : public DatabaseAtomic { public: + static constexpr auto ALL_GROUPS_CLUSTER_PREFIX = "all_groups."; + DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid, const String & zookeeper_path_, const String & shard_name_, const String & replica_name_, DatabaseReplicatedSettings db_settings_, @@ -65,6 +67,7 @@ public: /// Returns cluster consisting of database replicas ClusterPtr tryGetCluster() const; + ClusterPtr tryGetAllGroupsCluster() const; void drop(ContextPtr /*context*/) override; @@ -83,6 +86,8 @@ public: std::vector tryGetAreReplicasActive(const ClusterPtr & cluster_) const; + void renameDatabase(ContextPtr query_context, const String & new_name) override; + friend struct DatabaseReplicatedTask; friend class DatabaseReplicatedDDLWorker; private: @@ -113,8 +118,8 @@ private: ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query); String readMetadataFile(const String & table_name) const; - ClusterPtr getClusterImpl() const; - void setCluster(ClusterPtr && new_cluster); + ClusterPtr getClusterImpl(bool all_groups = false) const; + void setCluster(ClusterPtr && new_cluster, bool all_groups = false); void createEmptyLogEntry(const ZooKeeperPtr & current_zookeeper); @@ -155,6 +160,7 @@ private: UInt64 tables_metadata_digest TSA_GUARDED_BY(metadata_mutex); mutable ClusterPtr cluster; + mutable ClusterPtr cluster_all_groups; LoadTaskPtr startup_replicated_database_task TSA_GUARDED_BY(mutex); }; diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 6e19a77c501..31d6f7876a8 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -421,6 +421,8 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na { /// Some replica is added or removed, let's update cached cluster database->setCluster(database->getClusterImpl()); + if (!database->replica_group_name.empty()) + database->setCluster(database->getClusterImpl(/*all_groups*/ true), /*all_groups*/ true); out_reason = fmt::format("Entry {} is a dummy task", entry_name); return {}; } diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 5fee14ecc2a..6426123bb4f 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -41,11 +41,11 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function" " and doesn't have structure in metadata", backQuote(ast_create_query.getTable())); - if (!has_structure && !ast_create_query.is_dictionary) + if (!has_structure && !ast_create_query.is_dictionary && !ast_create_query.isParameterizedView()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot alter table {} metadata doesn't have structure", backQuote(ast_create_query.getTable())); - if (!ast_create_query.is_dictionary) + if (!ast_create_query.is_dictionary && !ast_create_query.isParameterizedView()) { ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns); ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices); @@ -260,7 +260,9 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n res = it->second; tables.erase(it); res->is_detached = true; - CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); + + if (res->isSystemStorage() == false) + CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); auto table_id = res->getStorageID(); if (table_id.hasUUID()) @@ -301,7 +303,9 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// It is important to reset is_detached here since in case of RENAME in /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - CurrentMetrics::add(getAttachedCounterForStorage(table), 1); + + if (table->isSystemStorage() == false && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) + CurrentMetrics::add(getAttachedCounterForStorage(table), 1); } void DatabaseWithOwnTablesBase::shutdown() diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index 6aa13b7b759..733e5d53981 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -137,7 +137,7 @@ void TablesLoader::buildDependencyGraph() { for (const auto & [table_name, table_metadata] : metadata.parsed_tables) { - auto new_ref_dependencies = getDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast); + auto new_ref_dependencies = getDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast, global_context->getCurrentDatabase()); auto new_loading_dependencies = getLoadingDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast); if (!new_ref_dependencies.empty()) diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 2842e2b8799..1816324a93b 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -511,7 +511,10 @@ MutableColumns CacheDictionary::aggregateColumns( if (default_mask) { if (key_state_from_storage.isDefault()) + { (*default_mask)[key_index] = 1; + aggregated_column->insertDefault(); + } else { (*default_mask)[key_index] = 0; diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 933ffa04069..2c0f7653aff 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -175,8 +175,7 @@ Columns DirectDictionary::getColumns( if (!mask_filled) (*default_mask)[requested_key_index] = 1; - Field value{}; - result_column->insert(value); + result_column->insertDefault(); } else { diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 9b575c65bce..f06f5ba8e17 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -355,6 +355,8 @@ public: { return delegate->getS3StorageClient(); } + + std::shared_ptr tryGetS3StorageClient() const override { return delegate->tryGetS3StorageClient(); } #endif private: diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index 5f0ca850b40..4bbefad5290 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -186,7 +186,7 @@ void IDisk::checkAccess() DB::UUID server_uuid = DB::ServerUUID::get(); if (server_uuid == DB::UUIDHelpers::Nil) throw Exception(ErrorCodes::LOGICAL_ERROR, "Server UUID is not initialized"); - const String path = fmt::format("clickhouse_access_check_{}", DB::toString(server_uuid)); + const String path = fmt::format("clickhouse_access_check_{}", toString(server_uuid)); checkAccessImpl(path); } diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 658acb01c74..412ad27e94f 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -427,7 +427,7 @@ public: /// Device: 10301h/66305d Inode: 3109907 Links: 1 /// Why we have always zero by default? Because normal filesystem /// manages hardlinks by itself. So you can always remove hardlink and all - /// other alive harlinks will not be removed. + /// other alive hardlinks will not be removed. virtual UInt32 getRefCount(const String &) const { return 0; } /// Revision is an incremental counter of disk operation. @@ -478,6 +478,8 @@ public: "Method getS3StorageClient() is not implemented for disk type: {}", getDataSourceDescription().toString()); } + + virtual std::shared_ptr tryGetS3StorageClient() const { return nullptr; } #endif diff --git a/src/Disks/IO/IOUringReader.cpp b/src/Disks/IO/IOUringReader.cpp index 6b0e3f8cc89..b0e783e11d9 100644 --- a/src/Disks/IO/IOUringReader.cpp +++ b/src/Disks/IO/IOUringReader.cpp @@ -22,7 +22,8 @@ namespace ProfileEvents extern const Event AsynchronousReaderIgnoredBytes; extern const Event IOUringSQEsSubmitted; - extern const Event IOUringSQEsResubmits; + extern const Event IOUringSQEsResubmitsAsync; + extern const Event IOUringSQEsResubmitsSync; extern const Event IOUringCQEsCompleted; extern const Event IOUringCQEsFailed; } @@ -149,10 +150,12 @@ int IOUringReader::submitToRing(EnqueuedRequest & enqueued) io_uring_prep_read(sqe, fd, request.buf, static_cast(request.size - enqueued.bytes_read), request.offset + enqueued.bytes_read); int ret = 0; - do + ret = io_uring_submit(&ring); + while (ret == -EINTR || ret == -EAGAIN) { + ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmitsSync); ret = io_uring_submit(&ring); - } while (ret == -EINTR || ret == -EAGAIN); + } if (ret > 0 && !enqueued.resubmitting) { @@ -266,7 +269,7 @@ void IOUringReader::monitorRing() if (cqe->res == -EAGAIN || cqe->res == -EINTR) { enqueued.resubmitting = true; - ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmits); + ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmitsAsync); ret = submitToRing(enqueued); if (ret <= 0) @@ -310,6 +313,7 @@ void IOUringReader::monitorRing() // potential short read, re-submit enqueued.resubmitting = true; enqueued.bytes_read += bytes_read; + ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmitsAsync); ret = submitToRing(enqueued); if (ret <= 0) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 2c90e3a9003..d1324e22978 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -14,16 +14,29 @@ namespace ProfileEvents { extern const Event RemoteWriteThrottlerBytes; extern const Event RemoteWriteThrottlerSleepMicroseconds; + + extern const Event AzureUpload; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; + + extern const Event DiskAzureUpload; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; + } namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + struct WriteBufferFromAzureBlobStorage::PartData { Memory<> memory; size_t data_size = 0; - std::string block_id; }; BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureObjectStorageSettings & settings) @@ -119,22 +132,34 @@ void WriteBufferFromAzureBlobStorage::preFinalize() // This function should not be run again is_prefinalized = true; + hidePartialData(); + + if (hidden_size > 0) + detachBuffer(); + + setFakeBufferWhenPreFinalized(); + /// If there is only one block and size is less than or equal to max_single_part_upload_size /// then we use single part upload instead of multi part upload - if (buffer_allocation_policy->getBufferNumber() == 1) + if (block_ids.empty() && detached_part_data.size() == 1 && detached_part_data.front().data_size <= max_single_part_upload_size) { - size_t data_size = size_t(position() - memory.data()); - if (data_size <= max_single_part_upload_size) - { - auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); - Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(memory.data()), data_size); - execWithRetry([&](){ block_blob_client.Upload(memory_stream); }, max_unexpected_write_error_retries, data_size); - LOG_TRACE(log, "Committed single block for blob `{}`", blob_path); - return; - } - } + ProfileEvents::increment(ProfileEvents::AzureUpload); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureUpload); - writePart(); + auto part_data = std::move(detached_part_data.front()); + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(part_data.memory.data()), part_data.data_size); + execWithRetry([&](){ block_blob_client.Upload(memory_stream); }, max_unexpected_write_error_retries, part_data.data_size); + LOG_TRACE(log, "Committed single block for blob `{}`", blob_path); + + detached_part_data.pop_front(); + return; + } + else + { + writeMultipartUpload(); + } } void WriteBufferFromAzureBlobStorage::finalizeImpl() @@ -144,10 +169,18 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() if (!is_prefinalized) preFinalize(); + chassert(offset() == 0); + chassert(hidden_size == 0); + + task_tracker->waitAll(); + if (!block_ids.empty()) { - task_tracker->waitAll(); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); } @@ -155,14 +188,66 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() void WriteBufferFromAzureBlobStorage::nextImpl() { + if (is_prefinalized) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot write to prefinalized buffer for Azure Blob Storage, the file could have been created"); + task_tracker->waitIfAny(); - writePart(); + + hidePartialData(); + + reallocateFirstBuffer(); + + if (available() > 0) + return; + + detachBuffer(); + + if (detached_part_data.size() > 1) + writeMultipartUpload(); + allocateBuffer(); } +void WriteBufferFromAzureBlobStorage::hidePartialData() +{ + if (write_settings.remote_throttler) + write_settings.remote_throttler->add(offset(), ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds); + + chassert(memory.size() >= hidden_size + offset()); + + hidden_size += offset(); + chassert(memory.data() + hidden_size == working_buffer.begin() + offset()); + chassert(memory.data() + hidden_size == position()); + + WriteBuffer::set(memory.data() + hidden_size, memory.size() - hidden_size); + chassert(offset() == 0); +} + +void WriteBufferFromAzureBlobStorage::reallocateFirstBuffer() +{ + chassert(offset() == 0); + + if (buffer_allocation_policy->getBufferNumber() > 1 || available() > 0) + return; + + const size_t max_first_buffer = buffer_allocation_policy->getBufferSize(); + if (memory.size() == max_first_buffer) + return; + + size_t size = std::min(memory.size() * 2, max_first_buffer); + memory.resize(size); + + WriteBuffer::set(memory.data() + hidden_size, memory.size() - hidden_size); + chassert(offset() == 0); +} + void WriteBufferFromAzureBlobStorage::allocateBuffer() { buffer_allocation_policy->nextBuffer(); + chassert(0 == hidden_size); + auto size = buffer_allocation_policy->getBufferSize(); if (buffer_allocation_policy->getBufferNumber() == 1) @@ -172,30 +257,60 @@ void WriteBufferFromAzureBlobStorage::allocateBuffer() WriteBuffer::set(memory.data(), memory.size()); } -void WriteBufferFromAzureBlobStorage::writePart() +void WriteBufferFromAzureBlobStorage::detachBuffer() { - auto data_size = size_t(position() - memory.data()); + size_t data_size = size_t(position() - memory.data()); if (data_size == 0) return; - const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64)); - std::shared_ptr part_data = std::make_shared(std::move(memory), data_size, block_id); - WriteBuffer::set(nullptr, 0); + chassert(data_size == hidden_size); - auto upload_worker = [this, part_data] () + auto buf = std::move(memory); + + WriteBuffer::set(nullptr, 0); + total_size += hidden_size; + hidden_size = 0; + + detached_part_data.push_back({std::move(buf), data_size}); + WriteBuffer::set(nullptr, 0); +} + +void WriteBufferFromAzureBlobStorage::writePart(WriteBufferFromAzureBlobStorage::PartData && part_data) +{ + const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64)); + auto worker_data = std::make_shared>(block_id, std::move(part_data)); + + auto upload_worker = [this, worker_data] () { + auto & data_size = std::get<1>(*worker_data).data_size; + auto & data_block_id = std::get<0>(*worker_data); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); - Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(part_data->memory.data()), part_data->data_size); - execWithRetry([&](){ block_blob_client.StageBlock(part_data->block_id, memory_stream); }, max_unexpected_write_error_retries, part_data->data_size); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); + if (blob_container_client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); - if (write_settings.remote_throttler) - write_settings.remote_throttler->add(part_data->data_size, ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds); + Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(std::get<1>(*worker_data).memory.data()), data_size); + execWithRetry([&](){ block_blob_client.StageBlock(data_block_id, memory_stream); }, max_unexpected_write_error_retries, data_size); }; task_tracker->add(std::move(upload_worker)); } +void WriteBufferFromAzureBlobStorage::setFakeBufferWhenPreFinalized() +{ + WriteBuffer::set(fake_buffer_when_prefinalized, sizeof(fake_buffer_when_prefinalized)); +} + +void WriteBufferFromAzureBlobStorage::writeMultipartUpload() +{ + while (!detached_part_data.empty()) + { + writePart(std::move(detached_part_data.front())); + detached_part_data.pop_front(); + } +} + } #endif diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 3da6d843991..10fe871a727 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -48,8 +48,13 @@ public: private: struct PartData; - void writePart(); + void writeMultipartUpload(); + void writePart(PartData && part_data); + void detachBuffer(); + void reallocateFirstBuffer(); void allocateBuffer(); + void hidePartialData(); + void setFakeBufferWhenPreFinalized(); void finalizeImpl() override; void execWithRetry(std::function func, size_t num_tries, size_t cost = 0); @@ -77,9 +82,16 @@ private: MemoryBufferPtr allocateBuffer() const; + char fake_buffer_when_prefinalized[1] = {}; + bool first_buffer=true; + size_t total_size = 0; + size_t hidden_size = 0; + std::unique_ptr task_tracker; + + std::deque detached_part_data; }; } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index bae58f0b9c6..1a5388349f8 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -181,7 +181,7 @@ std::unique_ptr getAzureBlobStorageClientWithAuth( if (config.getBool(config_prefix + ".use_workload_identity", false)) { auto workload_identity_credential = std::make_shared(); - return std::make_unique(url, workload_identity_credential); + return std::make_unique(url, workload_identity_credential, client_options); } auto managed_identity_credential = std::make_shared(); diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index e7ecf7cd515..0ebe885a3e7 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -60,7 +60,6 @@ public: "ListObjectAzure") , client(client_) { - options.Prefix = path_prefix; options.PageSizeHint = static_cast(max_list_size); } @@ -127,25 +126,22 @@ bool AzureObjectStorage::exists(const StoredObject & object) const { auto client_ptr = client.get(); - /// What a shame, no Exists method... - Azure::Storage::Blobs::ListBlobsOptions options; - options.Prefix = object.remote_path; - options.PageSizeHint = 1; - - ProfileEvents::increment(ProfileEvents::AzureListObjects); + ProfileEvents::increment(ProfileEvents::AzureGetProperties); if (client_ptr->GetClickhouseOptions().IsClientForDisk) - ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); + ProfileEvents::increment(ProfileEvents::DiskAzureGetProperties); - auto blobs_list_response = client_ptr->ListBlobs(options); - auto blobs_list = blobs_list_response.Blobs; - - for (const auto & blob : blobs_list) + try { - if (object.remote_path == blob.Name) - return true; + auto blob_client = client_ptr->GetBlobClient(object.remote_path); + blob_client.GetProperties(); + return true; + } + catch (const Azure::Storage::StorageException & e) + { + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) + return false; + throw; } - - return false; } ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const @@ -153,14 +149,16 @@ ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_pr auto settings_ptr = settings.get(); auto client_ptr = client.get(); - return std::make_shared(path_prefix, client_ptr, max_keys); + return std::make_shared(path_prefix, client_ptr, max_keys ? max_keys : settings_ptr->list_object_keys_size); } void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const { auto client_ptr = client.get(); - /// What a shame, no Exists method... + /// NOTE: list doesn't work if endpoint contains non-empty prefix for blobs. + /// See AzureBlobStorageEndpoint and processAzureBlobStorageEndpoint for details. + Azure::Storage::Blobs::ListBlobsOptions options; options.Prefix = path; if (max_keys) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 8ead696cf78..c342929d656 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -166,6 +166,8 @@ public: return client.get(); } + bool supportParallelWrite() const override { return true; } + private: using SharedAzureClientPtr = std::shared_ptr; void removeObjectImpl(const StoredObject & object, const SharedAzureClientPtr & client_ptr, bool if_exists); diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 6a5a75c08f0..727dbeed853 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -138,6 +138,11 @@ public: { return object_storage->getS3StorageClient(); } + + std::shared_ptr tryGetS3StorageClient() override + { + return object_storage->tryGetS3StorageClient(); + } #endif private: diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 5803a985000..4de6d78e952 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -587,6 +587,11 @@ std::shared_ptr DiskObjectStorage::getS3StorageClient() const { return object_storage->getS3StorageClient(); } + +std::shared_ptr DiskObjectStorage::tryGetS3StorageClient() const +{ + return object_storage->tryGetS3StorageClient(); +} #endif DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index ffef0a007da..59cc82d8c81 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -214,6 +214,7 @@ public: #if USE_AWS_S3 std::shared_ptr getS3StorageClient() const override; + std::shared_ptr tryGetS3StorageClient() const override; #endif private: diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 7bc9e4073db..6410a9a7a73 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -75,6 +75,7 @@ struct RelativePathWithMetadata virtual std::string getPath() const { return relative_path; } virtual bool isArchive() const { return false; } virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } + virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); } }; struct ObjectKeyWithMetadata @@ -127,8 +128,10 @@ public: /// /, /a, /a/b, /a/b/c, /a/b/c/d while exists will return true only for /a/b/c/d virtual bool existsOrHasAnyChild(const std::string & path) const; + /// List objects recursively by certain prefix. virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const; + /// List objects recursively by certain prefix. Use it instead of listObjects, if you want to list objects lazily. virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const; /// Get object metadata if supported. It should be possible to receive @@ -269,6 +272,7 @@ public: { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for S3ObjectStorage"); } + virtual std::shared_ptr tryGetS3StorageClient() { return nullptr; } #endif diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index f290a762205..66da0f2431e 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -22,8 +22,7 @@ using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr +#include #include +#include +#include #include #include #include "CommonPathPrefixKeyGenerator.h" + namespace DB { @@ -22,34 +26,78 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri { MetadataStorageFromPlainObjectStorage::PathMap result; - RelativePathsWithMetadata files; - object_storage->listObjects(root, files, 0); - for (const auto & file : files) + ThreadPool & pool = getIOThreadPool().get(); + ThreadPoolCallbackRunnerLocal runner(pool, "PlainRWMetaLoad"); + std::mutex mutex; + + LoggerPtr log = getLogger("MetadataStorageFromPlainObjectStorage"); + + ReadSettings settings; + settings.enable_filesystem_cache = false; + settings.remote_fs_method = RemoteFSReadMethod::read; + settings.remote_fs_buffer_size = 1024; /// These files are small. + + LOG_DEBUG(log, "Loading metadata"); + size_t num_files = 0; + for (auto iterator = object_storage->iterate(root, 0); iterator->isValid(); iterator->next()) { - auto remote_path = std::filesystem::path(file->relative_path); + ++num_files; + auto file = iterator->current(); + String path = file->getPath(); + auto remote_path = std::filesystem::path(path); if (remote_path.filename() != PREFIX_PATH_FILE_NAME) continue; - StoredObject object{file->relative_path}; + runner([remote_path, path, &object_storage, &result, &mutex, &log, &settings] + { + setThreadName("PlainRWMetaLoad"); - auto read_buf = object_storage->readObject(object); - String local_path; - readStringUntilEOF(local_path, *read_buf); + StoredObject object{path}; + String local_path; - chassert(remote_path.has_parent_path()); - auto res = result.emplace(local_path, remote_path.parent_path()); + try + { + auto read_buf = object_storage->readObject(object, settings); + readStringUntilEOF(local_path, *read_buf); + } +#if USE_AWS_S3 + catch (const S3Exception & e) + { + /// It is ok if a directory was removed just now. + /// We support attaching a filesystem that is concurrently modified by someone else. + if (e.getS3ErrorCode() == Aws::S3::S3Errors::NO_SUCH_KEY) + return; + throw; + } +#endif + catch (...) + { + throw; + } - /// This can happen if table replication is enabled, then the same local path is written - /// in `prefix.path` of each replica. - /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? - if (!res.second) - LOG_WARNING( - getLogger("MetadataStorageFromPlainObjectStorage"), - "The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'", - local_path, - res.first->second, - remote_path.parent_path().string()); + chassert(remote_path.has_parent_path()); + std::pair res; + { + std::lock_guard lock(mutex); + res = result.emplace(local_path, remote_path.parent_path()); + } + + /// This can happen if table replication is enabled, then the same local path is written + /// in `prefix.path` of each replica. + /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? + if (!res.second) + LOG_WARNING( + log, + "The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'", + local_path, + res.first->second, + remote_path.parent_path().string()); + }); } + + runner.waitForAllToFinishAndRethrowFirstError(); + LOG_DEBUG(log, "Loaded metadata for {} files, found {} directories", num_files, result.size()); + auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::add(metric, result.size()); return result; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h index 661968d7044..a5394b9428d 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h @@ -4,6 +4,7 @@ #include + namespace DB { diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 8210255decb..1bf8250adff 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -172,6 +172,14 @@ void checkS3Capabilities( } } +static std::string getEndpoint( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const ContextPtr & context) +{ + return context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); +} + void registerS3ObjectStorage(ObjectStorageFactory & factory) { static constexpr auto disk_type = "s3"; @@ -185,8 +193,9 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory) { auto uri = getS3URI(config, config_prefix, context); auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); - auto settings = getSettings(config, config_prefix, context); - auto client = getClient(config, config_prefix, context, *settings, true); + auto endpoint = getEndpoint(config, config_prefix, context); + auto settings = getSettings(config, config_prefix, context, endpoint, /* validate_settings */true); + auto client = getClient(endpoint, *settings, context, /* for_disk_s3 */true); auto key_generator = getKeyGenerator(uri, config, config_prefix); auto object_storage = createObjectStorage( @@ -221,8 +230,9 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) auto uri = getS3URI(config, config_prefix, context); auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); - auto settings = getSettings(config, config_prefix, context); - auto client = getClient(config, config_prefix, context, *settings, true); + auto endpoint = getEndpoint(config, config_prefix, context); + auto settings = getSettings(config, config_prefix, context, endpoint, /* validate_settings */true); + auto client = getClient(endpoint, *settings, context, /* for_disk_s3 */true); auto key_generator = getKeyGenerator(uri, config, config_prefix); auto object_storage = std::make_shared>( @@ -255,8 +265,9 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) auto uri = getS3URI(config, config_prefix, context); auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); - auto settings = getSettings(config, config_prefix, context); - auto client = getClient(config, config_prefix, context, *settings, true); + auto endpoint = getEndpoint(config, config_prefix, context); + auto settings = getSettings(config, config_prefix, context, endpoint, /* validate_settings */true); + auto client = getClient(endpoint, *settings, context, /* for_disk_s3 */true); auto key_generator = getKeyGenerator(uri, config, config_prefix); auto metadata_storage_metrics = DB::MetadataStorageMetrics::create(); diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.h b/src/Disks/ObjectStorages/ObjectStorageIterator.h index 26c3c690ba5..d814514ddcc 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIterator.h +++ b/src/Disks/ObjectStorages/ObjectStorageIterator.h @@ -9,15 +9,34 @@ namespace DB class IObjectStorageIterator { public: + /// Moves iterator to the next element. If the iterator not isValid, the behavior is undefined. virtual void next() = 0; - virtual void nextBatch() = 0; + + /// Check if the iterator is valid, which means the `current` method can be called. virtual bool isValid() = 0; + + /// Return the current element. virtual RelativePathWithMetadataPtr current() = 0; - virtual RelativePathsWithMetadata currentBatch() = 0; + + /// This will initiate prefetching the next batch in background, so it can be obtained faster when needed. virtual std::optional getCurrentBatchAndScheduleNext() = 0; + + /// Returns the number of elements in the batches that were fetched so far. virtual size_t getAccumulatedSize() const = 0; virtual ~IObjectStorageIterator() = default; + +private: + /// Skips all the remaining elements in the current batch (if any), + /// and moves the iterator to the first element of the next batch, + /// or, if there is no more batches, the iterator becomes invalid. + /// If the iterator not isValid, the behavior is undefined. + virtual void nextBatch() = 0; + + /// Return the current batch of elements. + /// It is unspecified how batches are formed. + /// But this method can be used for more efficient processing. + virtual RelativePathsWithMetadata currentBatch() = 0; }; using ObjectStorageIteratorPtr = std::shared_ptr; @@ -25,6 +44,7 @@ using ObjectStorageIteratorPtr = std::shared_ptr; class ObjectStorageIteratorFromList : public IObjectStorageIterator { public: + /// Everything is represented by just a single batch. explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_) : batch(std::move(batch_)) , batch_iterator(batch.begin()) {} diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp index 0420de0f8dd..2d2e8cd2c1a 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp @@ -36,40 +36,47 @@ void IObjectStorageIteratorAsync::deactivate() void IObjectStorageIteratorAsync::nextBatch() { std::lock_guard lock(mutex); - if (is_finished) + + if (!has_next_batch) { current_batch.clear(); current_batch_iterator = current_batch.begin(); + is_finished = true; + return; } - else - { - if (!is_initialized) - { - outcome_future = scheduleBatch(); - is_initialized = true; - } + if (!is_initialized) + { + outcome_future = scheduleBatch(); + is_initialized = true; + } + + try + { chassert(outcome_future.valid()); - BatchAndHasNext result; - try - { - result = outcome_future.get(); - } - catch (...) - { - is_finished = true; - throw; - } + BatchAndHasNext result = outcome_future.get(); current_batch = std::move(result.batch); current_batch_iterator = current_batch.begin(); - accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); - - if (result.has_next) - outcome_future = scheduleBatch(); - else + if (current_batch.empty()) + { is_finished = true; + has_next_batch = false; + } + else + { + accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed); + + has_next_batch = result.has_next; + if (has_next_batch) + outcome_future = scheduleBatch(); + } + } + catch (...) + { + has_next_batch = false; + throw; } } @@ -77,10 +84,12 @@ void IObjectStorageIteratorAsync::next() { std::lock_guard lock(mutex); + if (is_finished) + return; + + ++current_batch_iterator; if (current_batch_iterator == current_batch.end()) nextBatch(); - else - ++current_batch_iterator; } std::future IObjectStorageIteratorAsync::scheduleBatch() @@ -95,35 +104,39 @@ std::future IObjectStorageIterator bool IObjectStorageIteratorAsync::isValid() { + std::lock_guard lock(mutex); + if (!is_initialized) nextBatch(); - std::lock_guard lock(mutex); - return current_batch_iterator != current_batch.end(); + return !is_finished; } RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current() { + std::lock_guard lock(mutex); + if (!isValid()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator"); - std::lock_guard lock(mutex); return *current_batch_iterator; } RelativePathsWithMetadata IObjectStorageIteratorAsync::currentBatch() { + std::lock_guard lock(mutex); + if (!isValid()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator"); - std::lock_guard lock(mutex); return current_batch; } std::optional IObjectStorageIteratorAsync::getCurrentBatchAndScheduleNext() { std::lock_guard lock(mutex); + if (!is_initialized) nextBatch(); diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h index cb4818d01ae..01371415124 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h @@ -35,7 +35,7 @@ public: void deactivate(); protected: - + /// This method fetches the next batch, and returns true if there are more batches after it. virtual bool getBatchAndCheckNext(RelativePathsWithMetadata & batch) = 0; struct BatchAndHasNext @@ -48,6 +48,7 @@ protected: bool is_initialized{false}; bool is_finished{false}; + bool has_next_batch{true}; bool deactivated{false}; mutable std::recursive_mutex mutex; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index ae719f5cde4..a6781e0ab35 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -168,7 +168,7 @@ private: bool S3ObjectStorage::exists(const StoredObject & object) const { auto settings_ptr = s3_settings.get(); - return S3::objectExists(*client.get(), uri.bucket, object.remote_path, {}, settings_ptr->request_settings); + return S3::objectExists(*client.get(), uri.bucket, object.remote_path, {}); } std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT @@ -258,13 +258,15 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN if (mode != WriteMode::Rewrite) throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files"); - S3Settings::RequestSettings request_settings = s3_settings.get()->request_settings; + S3::RequestSettings request_settings = s3_settings.get()->request_settings; /// NOTE: For background operations settings are not propagated from session or query. They are taken from /// default user's .xml config. It's obscure and unclear behavior. For them it's always better /// to rely on settings from disk. - if (auto query_context = CurrentThread::getQueryContext(); query_context && !query_context->isBackgroundOperationContext()) + if (auto query_context = CurrentThread::getQueryContext(); + query_context && !query_context->isBackgroundOperationContext()) { - request_settings.updateFromSettingsIfChanged(query_context->getSettingsRef()); + const auto & settings = query_context->getSettingsRef(); + request_settings.updateFromSettings(settings, /* if_changed */true, settings.s3_validate_request_settings); } ThreadPoolCallbackRunnerUnsafe scheduler; @@ -291,6 +293,8 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const { auto settings_ptr = s3_settings.get(); + if (!max_keys) + max_keys = settings_ptr->list_object_keys_size; return std::make_shared(uri.bucket, path_prefix, client.get(), max_keys); } @@ -382,6 +386,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e { std::vector current_chunk; String keys; + size_t first_position = current_position; for (; current_position < objects.size() && current_chunk.size() < chunk_size_limit; ++current_position) { Aws::S3::Model::ObjectIdentifier obj; @@ -407,9 +412,9 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e { const auto * outcome_error = outcome.IsSuccess() ? nullptr : &outcome.GetError(); auto time_now = std::chrono::system_clock::now(); - for (const auto & object : objects) + for (size_t i = first_position; i < current_position; ++i) blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, - uri.bucket, object.remote_path, object.local_path, object.bytes_size, + uri.bucket, objects[i].remote_path, objects[i].local_path, objects[i].bytes_size, outcome_error, time_now); } @@ -443,8 +448,7 @@ std::optional S3ObjectStorage::tryGetObjectMetadata(const std::s { auto settings_ptr = s3_settings.get(); auto object_info = S3::getObjectInfo( - *client.get(), uri.bucket, path, {}, settings_ptr->request_settings, - /* with_metadata= */ true, /* throw_on_error= */ false); + *client.get(), uri.bucket, path, {}, /* with_metadata= */ true, /* throw_on_error= */ false); if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty()) return {}; @@ -463,7 +467,7 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons S3::ObjectInfo object_info; try { - object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true); + object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, /* with_metadata= */ true); } catch (DB::Exception & e) { @@ -492,7 +496,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT { auto current_client = dest_s3->client.get(); auto settings_ptr = s3_settings.get(); - auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings); + auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}); auto scheduler = threadPoolCallbackRunnerUnsafe(getThreadPoolWriter(), "S3ObjStor_copy"); try @@ -536,7 +540,7 @@ void S3ObjectStorage::copyObject( // NOLINT { auto current_client = client.get(); auto settings_ptr = s3_settings.get(); - auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings); + auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}); auto scheduler = threadPoolCallbackRunnerUnsafe(getThreadPoolWriter(), "S3ObjStor_copy"); copyS3File( @@ -581,19 +585,22 @@ void S3ObjectStorage::applyNewSettings( ContextPtr context, const ApplyNewSettingsOptions & options) { - auto settings_from_config = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings); + auto settings_from_config = getSettings(config, config_prefix, context, uri.uri_str, context->getSettingsRef().s3_validate_request_settings); auto modified_settings = std::make_unique(*s3_settings.get()); - modified_settings->auth_settings.updateFrom(settings_from_config->auth_settings); - modified_settings->request_settings = settings_from_config->request_settings; + modified_settings->auth_settings.updateIfChanged(settings_from_config->auth_settings); + modified_settings->request_settings.updateIfChanged(settings_from_config->request_settings); if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName())) - modified_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); + { + modified_settings->auth_settings.updateIfChanged(endpoint_settings->auth_settings); + modified_settings->request_settings.updateIfChanged(endpoint_settings->request_settings); + } auto current_settings = s3_settings.get(); if (options.allow_client_change && (current_settings->auth_settings.hasUpdates(modified_settings->auth_settings) || for_disk_s3)) { - auto new_client = getClient(config, config_prefix, context, *modified_settings, for_disk_s3, &uri); + auto new_client = getClient(uri, *modified_settings, context, for_disk_s3); client.set(std::move(new_client)); } s3_settings.set(std::move(modified_settings)); @@ -605,8 +612,9 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( const std::string & config_prefix, ContextPtr context) { - auto new_s3_settings = getSettings(config, config_prefix, context); - auto new_client = getClient(config, config_prefix, context, *new_s3_settings, true); + const auto & settings = context->getSettingsRef(); + auto new_s3_settings = getSettings(config, config_prefix, context, uri.uri_str, settings.s3_validate_request_settings); + auto new_client = getClient(uri, *new_s3_settings, context, for_disk_s3); auto new_uri{uri}; new_uri.bucket = new_namespace; @@ -628,6 +636,10 @@ std::shared_ptr S3ObjectStorage::getS3StorageClient() return client.get(); } +std::shared_ptr S3ObjectStorage::tryGetS3StorageClient() +{ + return client.get(); +} } #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 6eacf3a1eee..cbe004bc298 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include @@ -20,7 +20,7 @@ struct S3ObjectStorageSettings S3ObjectStorageSettings() = default; S3ObjectStorageSettings( - const S3Settings::RequestSettings & request_settings_, + const S3::RequestSettings & request_settings_, const S3::AuthSettings & auth_settings_, uint64_t min_bytes_for_seek_, int32_t list_object_keys_size_, @@ -34,7 +34,7 @@ struct S3ObjectStorageSettings , read_only(read_only_) {} - S3Settings::RequestSettings request_settings; + S3::RequestSettings request_settings; S3::AuthSettings auth_settings; uint64_t min_bytes_for_seek; @@ -67,7 +67,7 @@ private: } public: - template + template explicit S3ObjectStorage(std::unique_ptr && client_, Args && ...args) : S3ObjectStorage("S3ObjectStorage", std::move(client_), std::forward(args)...) { @@ -169,6 +169,7 @@ public: bool isReadOnly() const override { return s3_settings.get()->read_only; } std::shared_ptr getS3StorageClient() override; + std::shared_ptr tryGetS3StorageClient() override; private: void setNewSettings(std::unique_ptr && s3_settings_); diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 139472a8b01..62df98f51e6 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -18,18 +19,12 @@ #include #include -#include +#include #include #include -#include namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace ErrorCodes { extern const int NO_ELEMENTS_IN_CONFIG; @@ -39,11 +34,16 @@ std::unique_ptr getSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, + const std::string & endpoint, bool validate_settings) { - const Settings & settings = context->getSettingsRef(); - auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_", validate_settings); - auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); + const auto & settings = context->getSettingsRef(); + + auto auth_settings = S3::AuthSettings(config, settings, config_prefix); + auto request_settings = S3::RequestSettings(config, settings, config_prefix, "s3_", validate_settings); + + request_settings.proxy_resolver = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat( + ProxyConfiguration::protocolFromString(S3::URI(endpoint).uri.getScheme()), config_prefix, config); return std::make_unique( request_settings, @@ -55,38 +55,33 @@ std::unique_ptr getSettings( } std::unique_ptr getClient( - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - ContextPtr context, + const std::string & endpoint, const S3ObjectStorageSettings & settings, - bool for_disk_s3, - const S3::URI * url_) + ContextPtr context, + bool for_disk_s3) +{ + auto url = S3::URI(endpoint); + if (!url.key.ends_with('/')) + url.key.push_back('/'); + return getClient(url, settings, context, for_disk_s3); +} + +std::unique_ptr getClient( + const S3::URI & url, + const S3ObjectStorageSettings & settings, + ContextPtr context, + bool for_disk_s3) { const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); - const Settings & local_settings = context->getSettingsRef(); - const auto & auth_settings = settings.auth_settings; const auto & request_settings = settings.request_settings; - S3::URI url; - if (for_disk_s3) - { - String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); - url = S3::URI(endpoint); - if (!url.key.ends_with('/')) - url.key.push_back('/'); - } - else - { - if (!url_) - throw Exception(ErrorCodes::LOGICAL_ERROR, "URL not passed"); - url = *url_; - } const bool is_s3_express_bucket = S3::isS3ExpressEndpoint(url.endpoint); - if (is_s3_express_bucket && !config.has(config_prefix + ".region")) + if (is_s3_express_bucket && auth_settings.region.value.empty()) { throw Exception( - ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix); + ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "Region should be explicitly specified for directory buckets"); } S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( @@ -96,49 +91,40 @@ std::unique_ptr getClient( static_cast(global_settings.s3_retry_attempts), global_settings.enable_s3_requests_logging, for_disk_s3, - settings.request_settings.get_request_throttler, - settings.request_settings.put_request_throttler, + request_settings.get_request_throttler, + request_settings.put_request_throttler, url.uri.getScheme()); - client_configuration.connectTimeoutMs = config.getUInt64(config_prefix + ".connect_timeout_ms", local_settings.s3_connect_timeout_ms.value); - client_configuration.requestTimeoutMs = config.getUInt64(config_prefix + ".request_timeout_ms", local_settings.s3_request_timeout_ms.value); - client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", static_cast(request_settings.max_connections)); - client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT); - client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS); + client_configuration.connectTimeoutMs = auth_settings.connect_timeout_ms; + client_configuration.requestTimeoutMs = auth_settings.request_timeout_ms; + client_configuration.maxConnections = static_cast(auth_settings.max_connections); + client_configuration.http_keep_alive_timeout = auth_settings.http_keep_alive_timeout; + client_configuration.http_keep_alive_max_requests = auth_settings.http_keep_alive_max_requests; client_configuration.endpointOverride = url.endpoint; - client_configuration.s3_use_adaptive_timeouts = config.getBool( - config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts); + client_configuration.s3_use_adaptive_timeouts = auth_settings.use_adaptive_timeouts; - if (for_disk_s3) + if (request_settings.proxy_resolver) { /* * Override proxy configuration for backwards compatibility with old configuration format. * */ - if (auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat( - ProxyConfiguration::protocolFromString(url.uri.getScheme()), config_prefix, config)) - { - client_configuration.per_request_configuration - = [proxy_config]() { return proxy_config->resolve(); }; - client_configuration.error_report - = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); }; - } + client_configuration.per_request_configuration = [=]() { return request_settings.proxy_resolver->resolve(); }; + client_configuration.error_report = [=](const auto & request_config) { request_settings.proxy_resolver->errorReport(request_config); }; } - S3::ServerSideEncryptionKMSConfig sse_kms_config = S3::getSSEKMSConfig(config_prefix, config); S3::ClientSettings client_settings{ .use_virtual_addressing = url.is_virtual_hosted_style, - .disable_checksum = local_settings.s3_disable_checksum, - .gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false), - .is_s3express_bucket = is_s3_express_bucket, + .disable_checksum = auth_settings.disable_checksum, + .gcs_issue_compose_request = auth_settings.gcs_issue_compose_request, }; auto credentials_configuration = S3::CredentialsConfiguration { - auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)), - auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)), - auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), - auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), + auth_settings.use_environment_credentials, + auth_settings.use_insecure_imds_request, + auth_settings.expiration_window_seconds, + auth_settings.no_sign_request, }; return S3::ClientFactory::instance().create( @@ -147,7 +133,7 @@ std::unique_ptr getClient( auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.server_side_encryption_customer_key_base64, - std::move(sse_kms_config), + auth_settings.server_side_encryption_kms_config, auth_settings.headers, credentials_configuration, auth_settings.session_token); diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h index 11ac64ce913..aa427bee41a 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.h +++ b/src/Disks/ObjectStorages/S3/diskSettings.h @@ -18,15 +18,20 @@ std::unique_ptr getSettings( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - bool validate_settings = true); + const std::string & endpoint, + bool validate_settings); std::unique_ptr getClient( - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - ContextPtr context, + const std::string & endpoint, const S3ObjectStorageSettings & settings, - bool for_disk_s3, - const S3::URI * url_ = nullptr); + ContextPtr context, + bool for_disk_s3); + +std::unique_ptr getClient( + const S3::URI & url_, + const S3ObjectStorageSettings & settings, + ContextPtr context, + bool for_disk_s3); } diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index 7b2762613b6..fd43f31a009 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -47,7 +47,7 @@ namespace auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { auto disk = DiskFactory::instance().create( - disk_name, *config, "", context, disks_map, /* attach */attach, /* custom_disk */true); + disk_name, *config, /* config_path */"", context, disks_map, /* attach */attach, /* custom_disk */true); /// Mark that disk can be used without storage policy. disk->markDiskAsCustom(); return disk; diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 9577ca2a8df..36d16d8d154 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -303,7 +303,7 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet auto type = tryInferDataTypeForSingleField(data, format_settings); /// If we couldn't infer any type or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string. - if (!type || (isNumber(type) && !format_settings.csv.try_infer_numbers_from_strings)) + if (!type || (format_settings.csv.try_infer_strings_from_quoted_tuples && isTuple(type)) || (!format_settings.csv.try_infer_numbers_from_strings && isNumber(type))) return std::make_shared(); return type; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a7883919c4c..79c2e6b4890 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -77,6 +77,8 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.avro.output_rows_in_file = settings.output_format_avro_rows_in_file; format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes; format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes; + format_settings.csv.serialize_tuple_into_separate_columns = settings.output_format_csv_serialize_tuple_into_separate_columns; + format_settings.csv.deserialize_separate_columns_into_tuple = settings.input_format_csv_deserialize_separate_columns_into_tuple; format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line; format_settings.csv.allow_cr_end_of_line = settings.input_format_csv_allow_cr_end_of_line; format_settings.csv.delimiter = settings.format_csv_delimiter; @@ -94,6 +96,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns; format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values; format_settings.csv.try_infer_numbers_from_strings = settings.input_format_csv_try_infer_numbers_from_strings; + format_settings.csv.try_infer_strings_from_quoted_tuples = settings.input_format_csv_try_infer_strings_from_quoted_tuples; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; @@ -146,6 +149,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; + format_settings.json.ignore_key_case = settings.input_format_json_ignore_key_case; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; @@ -161,12 +165,14 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string; format_settings.parquet.output_fixed_string_as_fixed_byte_array = settings.output_format_parquet_fixed_string_as_fixed_byte_array; format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size; + format_settings.parquet.prefer_block_bytes = settings.input_format_parquet_prefer_block_bytes; format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method; format_settings.parquet.output_compliant_nested_types = settings.output_format_parquet_compliant_nested_types; format_settings.parquet.use_custom_encoder = settings.output_format_parquet_use_custom_encoder; format_settings.parquet.parallel_encoding = settings.output_format_parquet_parallel_encoding; format_settings.parquet.data_page_size = settings.output_format_parquet_data_page_size; format_settings.parquet.write_batch_size = settings.output_format_parquet_batch_size; + format_settings.parquet.write_page_index = settings.output_format_parquet_write_page_index; format_settings.parquet.local_read_min_bytes_for_seek = settings.input_format_parquet_local_file_min_bytes_for_seek; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; @@ -177,6 +183,8 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.pretty.highlight_digit_groups = settings.output_format_pretty_highlight_digit_groups; format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers; format_settings.pretty.output_format_pretty_single_large_number_tip_threshold = settings.output_format_pretty_single_large_number_tip_threshold; + format_settings.pretty.output_format_pretty_display_footer_column_names = settings.output_format_pretty_display_footer_column_names; + format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows = settings.output_format_pretty_display_footer_column_names_min_rows; format_settings.protobuf.input_flatten_google_wrappers = settings.input_format_protobuf_flatten_google_wrappers; format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers; format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index b296928e4d4..8ac783a1d86 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -153,6 +153,8 @@ struct FormatSettings char delimiter = ','; bool allow_single_quotes = true; bool allow_double_quotes = true; + bool serialize_tuple_into_separate_columns = true; + bool deserialize_separate_columns_into_tuple = true; bool empty_as_default = false; bool crlf_end_of_line = false; bool allow_cr_end_of_line = false; @@ -170,6 +172,7 @@ struct FormatSettings bool allow_variable_number_of_columns = false; bool use_default_on_bad_values = false; bool try_infer_numbers_from_strings = true; + bool try_infer_strings_from_quoted_tuples = true; } csv{}; struct HiveText @@ -225,6 +228,7 @@ struct FormatSettings bool infer_incomplete_types_as_strings = true; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; + bool ignore_key_case = false; } json{}; struct @@ -265,12 +269,14 @@ struct FormatSettings bool preserve_order = false; bool use_custom_encoder = true; bool parallel_encoding = true; - UInt64 max_block_size = 8192; + UInt64 max_block_size = DEFAULT_BLOCK_SIZE; + size_t prefer_block_bytes = DEFAULT_BLOCK_SIZE * 256; ParquetVersion output_version; ParquetCompression output_compression_method = ParquetCompression::SNAPPY; bool output_compliant_nested_types = true; size_t data_page_size = 1024 * 1024; size_t write_batch_size = 1024; + bool write_page_index = false; size_t local_read_min_bytes_for_seek = 8192; } parquet{}; @@ -285,6 +291,8 @@ struct FormatSettings bool output_format_pretty_row_numbers = false; UInt64 output_format_pretty_single_large_number_tip_threshold = 1'000'000; + UInt64 output_format_pretty_display_footer_column_names = 1; + UInt64 output_format_pretty_display_footer_column_names_min_rows = 50; enum class Charset : uint8_t { diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 6cbcae2bebe..31faea2e13e 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -879,11 +879,11 @@ namespace } template - bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings) + bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings, bool & has_fractional) { if (is_json || settings.try_infer_exponent_floats) - return tryReadFloatText(value, buf); - return tryReadFloatTextNoExponent(value, buf); + return tryReadFloatTextExt(value, buf, has_fractional); + return tryReadFloatTextExtNoExponent(value, buf, has_fractional); } template @@ -893,46 +893,31 @@ namespace return nullptr; Float64 tmp_float; + bool has_fractional; if (settings.try_infer_integers) { /// If we read from String, we can do it in a more efficient way. if (auto * string_buf = dynamic_cast(&buf)) { /// Remember the pointer to the start of the number to rollback to it. - char * number_start = buf.position(); - Int64 tmp_int; - bool read_int = tryReadIntText(tmp_int, buf); - /// If we reached eof, it cannot be float (it requires no less data than integer) - if (buf.eof()) - return read_int ? std::make_shared() : nullptr; - - char * int_end = buf.position(); /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof. - buf.position() = number_start; + char * number_start = buf.position(); - bool read_uint = false; - char * uint_end = nullptr; - /// In case of Int64 overflow we can try to infer UInt64. - if (!read_int) - { - UInt64 tmp_uint; - read_uint = tryReadIntText(tmp_uint, buf); - /// If we reached eof, it cannot be float (it requires no less data than integer) - if (buf.eof()) - return read_uint ? std::make_shared() : nullptr; - - uint_end = buf.position(); - buf.position() = number_start; - } - - if (tryReadFloat(tmp_float, buf, settings)) - { - if (read_int && buf.position() == int_end) - return std::make_shared(); - if (read_uint && buf.position() == uint_end) - return std::make_shared(); + /// NOTE: it may break parsing of tryReadFloat() != tryReadIntText() + parsing of '.'/'e' + /// But, for now it is true + if (tryReadFloat(tmp_float, buf, settings, has_fractional) && has_fractional) return std::make_shared(); - } + + Int64 tmp_int; + buf.position() = number_start; + if (tryReadIntText(tmp_int, buf)) + return std::make_shared(); + + /// In case of Int64 overflow we can try to infer UInt64. + UInt64 tmp_uint; + buf.position() = number_start; + if (tryReadIntText(tmp_uint, buf)) + return std::make_shared(); return nullptr; } @@ -942,36 +927,22 @@ namespace /// and then as float. PeekableReadBuffer peekable_buf(buf); PeekableReadBufferCheckpoint checkpoint(peekable_buf); - Int64 tmp_int; - bool read_int = tryReadIntText(tmp_int, peekable_buf); - auto * int_end = peekable_buf.position(); - peekable_buf.rollbackToCheckpoint(true); - bool read_uint = false; - char * uint_end = nullptr; - /// In case of Int64 overflow we can try to infer UInt64. - if (!read_int) - { - PeekableReadBufferCheckpoint new_checkpoint(peekable_buf); - UInt64 tmp_uint; - read_uint = tryReadIntText(tmp_uint, peekable_buf); - uint_end = peekable_buf.position(); - peekable_buf.rollbackToCheckpoint(true); - } - - if (tryReadFloat(tmp_float, peekable_buf, settings)) - { - /// Float parsing reads no fewer bytes than integer parsing, - /// so position of the buffer is either the same, or further. - /// If it's the same, then it's integer. - if (read_int && peekable_buf.position() == int_end) - return std::make_shared(); - if (read_uint && peekable_buf.position() == uint_end) - return std::make_shared(); + if (tryReadFloat(tmp_float, peekable_buf, settings, has_fractional) && has_fractional) return std::make_shared(); - } + peekable_buf.rollbackToCheckpoint(/* drop= */ false); + + Int64 tmp_int; + if (tryReadIntText(tmp_int, peekable_buf)) + return std::make_shared(); + peekable_buf.rollbackToCheckpoint(/* drop= */ true); + + /// In case of Int64 overflow we can try to infer UInt64. + UInt64 tmp_uint; + if (tryReadIntText(tmp_uint, peekable_buf)) + return std::make_shared(); } - else if (tryReadFloat(tmp_float, buf, settings)) + else if (tryReadFloat(tmp_float, buf, settings, has_fractional)) { return std::make_shared(); } @@ -1004,7 +975,8 @@ namespace buf.position() = buf.buffer().begin(); Float64 tmp; - if (tryReadFloat(tmp, buf, settings) && buf.eof()) + bool has_fractional; + if (tryReadFloat(tmp, buf, settings, has_fractional) && buf.eof()) return std::make_shared(); return nullptr; diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index c52b00150ec..7c90f83569a 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -3,35 +3,9 @@ add_subdirectory(divide) include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions .) -set(DBMS_FUNCTIONS - IFunction.cpp - FunctionFactory.cpp - FunctionHelpers.cpp - extractTimeZoneFromFunctionArguments.cpp - FunctionsLogical.cpp - if.cpp - multiIf.cpp - multiMatchAny.cpp - checkHyperscanRegexp.cpp - array/has.cpp - CastOverloadResolver.cpp - # Provides dependency for cast - createFunctionBaseCast() - FunctionsConversion.cpp -) -extract_into_parent_list(clickhouse_functions_sources dbms_sources ${DBMS_FUNCTIONS}) -extract_into_parent_list(clickhouse_functions_headers dbms_headers - IFunction.h - FunctionFactory.h - FunctionHelpers.h - extractTimeZoneFromFunctionArguments.h - FunctionsLogical.h - CastOverloadResolver.h -) - add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources}) if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options(clickhouse_functions_obj PRIVATE "-g0") - set_source_files_properties(${DBMS_FUNCTIONS} DIRECTORY .. PROPERTIES COMPILE_FLAGS "-g0") endif() list (APPEND OBJECT_LIBS $) diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index 3906563a254..083179c3ca8 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -12,7 +12,7 @@ # include # include -# include +# include namespace DB { @@ -22,36 +22,125 @@ namespace ErrorCodes extern const int INCORRECT_DATA; } +enum class Base64Variant : uint8_t +{ + Normal, + URL +}; + +inline std::string preprocessBase64URL(std::string_view src) +{ + std::string padded_src; + padded_src.reserve(src.size() + 3); + + // Do symbol substitution as described in https://datatracker.ietf.org/doc/html/rfc4648#section-5 + for (auto s : src) + { + switch (s) + { + case '_': + padded_src += '/'; + break; + case '-': + padded_src += '+'; + break; + default: + padded_src += s; + break; + } + } + + /// Insert padding to please aklomp library + size_t remainder = src.size() % 4; + switch (remainder) + { + case 0: + break; // no padding needed + case 1: + padded_src.append("==="); // this case is impossible to occur with valid base64-URL encoded input, however, we'll insert padding anyway + break; + case 2: + padded_src.append("=="); // two bytes padding + break; + default: // remainder == 3 + padded_src.append("="); // one byte padding + break; + } + + return padded_src; +} + +inline size_t postprocessBase64URL(UInt8 * dst, size_t out_len) +{ + // Do symbol substitution as described in https://datatracker.ietf.org/doc/html/rfc4648#section-5 + for (size_t i = 0; i < out_len; ++i) + { + switch (dst[i]) + { + case '/': + dst[i] = '_'; + break; + case '+': + dst[i] = '-'; + break; + case '=': // stop when padding is detected + return i; + default: + break; + } + } + return out_len; +} + +template struct Base64Encode { - static constexpr auto name = "base64Encode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Encode" : "base64URLEncode"; static size_t getBufferSize(size_t string_length, size_t string_count) { return ((string_length - string_count) / 3 + string_count) * 4 + string_count; } - static size_t perform(const std::span src, UInt8 * dst) + static size_t perform(std::string_view src, UInt8 * dst) { size_t outlen = 0; - base64_encode(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst), &outlen, 0); + base64_encode(src.data(), src.size(), reinterpret_cast(dst), &outlen, 0); + + /// Base64 library is using AVX-512 with some shuffle operations. + /// Memory sanitizer doesn't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. + __msan_unpoison(dst, outlen); + + if constexpr (variant == Base64Variant::URL) + outlen = postprocessBase64URL(dst, outlen); + return outlen; } }; +template struct Base64Decode { - static constexpr auto name = "base64Decode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Decode" : "base64URLDecode"; static size_t getBufferSize(size_t string_length, size_t string_count) { return ((string_length - string_count) / 4 + string_count) * 3 + string_count; } - static size_t perform(const std::span src, UInt8 * dst) + static size_t perform(std::string_view src, UInt8 * dst) { + int rc; size_t outlen = 0; - int rc = base64_decode(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst), &outlen, 0); + if constexpr (variant == Base64Variant::URL) + { + std::string src_padded = preprocessBase64URL(src); + rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast(dst), &outlen, 0); + } + else + { + rc = base64_decode(src.data(), src.size(), reinterpret_cast(dst), &outlen, 0); + } if (rc != 1) throw Exception( @@ -64,19 +153,29 @@ struct Base64Decode } }; +template struct TryBase64Decode { - static constexpr auto name = "tryBase64Decode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "tryBase64Decode" : "tryBase64URLDecode"; static size_t getBufferSize(size_t string_length, size_t string_count) { - return Base64Decode::getBufferSize(string_length, string_count); + return Base64Decode::getBufferSize(string_length, string_count); } - static size_t perform(const std::span src, UInt8 * dst) + static size_t perform(std::string_view src, UInt8 * dst) { + int rc; size_t outlen = 0; - int rc = base64_decode(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst), &outlen, 0); + if constexpr (variant == Base64Variant::URL) + { + std::string src_padded = preprocessBase64URL(src); + rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast(dst), &outlen, 0); + } + else + { + rc = base64_decode(src.data(), src.size(), reinterpret_cast(dst), &outlen, 0); + } if (rc != 1) outlen = 0; @@ -139,7 +238,7 @@ private: auto * dst = dst_chars.data(); auto * dst_pos = dst; - const auto * src = src_chars.data(); + const auto * src = reinterpret_cast(src_chars.data()); size_t src_offset_prev = 0; for (size_t row = 0; row < src_row_count; ++row) @@ -147,10 +246,6 @@ private: const size_t src_length = src_offsets[row] - src_offset_prev - 1; const size_t outlen = Func::perform({src, src_length}, dst_pos); - /// Base64 library is using AVX-512 with some shuffle operations. - /// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. - __msan_unpoison(dst_pos, outlen); - src += src_length + 1; dst_pos += outlen; *dst_pos = '\0'; @@ -179,16 +274,12 @@ private: auto * dst = dst_chars.data(); auto * dst_pos = dst; - const auto * src = src_chars.data(); + const auto * src = reinterpret_cast(src_chars.data()); for (size_t row = 0; row < src_row_count; ++row) { const auto outlen = Func::perform({src, src_n}, dst_pos); - /// Base64 library is using AVX-512 with some shuffle operations. - /// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. - __msan_unpoison(dst_pos, outlen); - src += src_n; dst_pos += outlen; *dst_pos = '\0'; diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 71e94b1e71d..950e4ab4ea8 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -16,6 +16,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int PARAMETER_OUT_OF_BOUND; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } @@ -146,6 +147,9 @@ private: const auto pos = pos_col_const->getUInt(0); if (pos < 8 * sizeof(ValueType)) mask = mask | (ValueType(1) << pos); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos)); } else { @@ -186,13 +190,20 @@ private: for (const auto i : collections::range(0, mask.size())) if (pos[i] < 8 * sizeof(ValueType)) mask[i] = mask[i] | (ValueType(1) << pos[i]); + else + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos[i])); return true; } else if (const auto pos_col_const = checkAndGetColumnConst>(pos_col_untyped)) { const auto & pos = pos_col_const->template getValue(); - const auto new_mask = pos < 8 * sizeof(ValueType) ? ValueType(1) << pos : 0; + if (pos >= 8 * sizeof(ValueType)) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument {} is out of bounds for number", static_cast(pos)); + + const auto new_mask = ValueType(1) << pos; for (const auto i : collections::range(0, mask.size())) mask[i] = mask[i] | new_mask; diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index c7a0c3c58ca..593646240ca 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -314,7 +314,7 @@ void checkFunctionArgumentSizes(const ColumnsWithTypeAndName & arguments, size_t if (current_size != input_rows_count) throw Exception( ErrorCodes::LOGICAL_ERROR, - "Expected the argument nº#{} ('{}' of type {}) to have {} rows, but it has {}", + "Expected the argument №{} ('{}' of type {}) to have {} rows, but it has {}", i + 1, arguments[i].name, arguments[i].type->getName(), diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index 085c4db3f57..0a2859fe864 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 44d0b750af9..2a0b2f1d075 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -709,7 +709,7 @@ bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateL else return tryReadFloatTextFast(x, rb); } - else /*if constexpr (is_integer_v)*/ + else /*if constexpr (is_integral_v)*/ return tryReadIntText(x, rb); } @@ -814,6 +814,16 @@ enum class ConvertFromStringParsingMode : uint8_t BestEffortUS }; +struct AccurateConvertStrategyAdditions +{ + UInt32 scale { 0 }; +}; + +struct AccurateOrNullConvertStrategyAdditions +{ + UInt32 scale { 0 }; +}; + template struct ConvertThroughParsing @@ -1020,7 +1030,13 @@ struct ConvertThroughParsing break; } } - parseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); + if constexpr (std::is_same_v) + { + if (!tryParseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing)) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse string to type {}", TypeName); + } + else + parseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); } while (false); } } @@ -1120,16 +1136,6 @@ struct ConvertThroughParsing /// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; -struct AccurateConvertStrategyAdditions -{ - UInt32 scale { 0 }; -}; - -struct AccurateOrNullConvertStrategyAdditions -{ - UInt32 scale { 0 }; -}; - enum class BehaviourOnErrorFromString : uint8_t { ConvertDefaultBehaviorTag, @@ -3174,8 +3180,11 @@ private: { TypeIndex from_type_index = from_type->getTypeId(); WhichDataType which(from_type_index); + TypeIndex to_type_index = to_type->getTypeId(); + WhichDataType to(to_type_index); bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) && (which.isInt() || which.isUInt() || which.isFloat()); + can_apply_accurate_cast |= cast_type == CastType::accurate && which.isStringOrFixedString() && to.isNativeInteger(); FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; if (context) @@ -3260,6 +3269,20 @@ private: return true; } } + else if constexpr (IsDataTypeStringOrFixedString) + { + if constexpr (IsDataTypeNumber) + { + chassert(wrapper_cast_type == CastType::accurate); + result_column = ConvertImpl::execute( + arguments, + result_type, + input_rows_count, + BehaviourOnErrorFromString::ConvertDefaultBehaviorTag, + AccurateConvertStrategyAdditions()); + } + return true; + } return false; }); diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index d2dac467bff..08e257de8ac 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -31,7 +31,6 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; extern const int ILLEGAL_COLUMN; @@ -40,26 +39,22 @@ namespace ErrorCodes } -/** Rounding Functions: - * round(x, N) - rounding to nearest (N = 0 by default). Use banker's rounding for floating point numbers. - * roundBankers(x, N) - rounding to nearest (N = 0 by default). Use banker's rounding for all numbers. - * floor(x, N) is the largest number <= x (N = 0 by default). - * ceil(x, N) is the smallest number >= x (N = 0 by default). - * trunc(x, N) - is the largest by absolute value number that is not greater than x by absolute value (N = 0 by default). - * - * The value of the parameter N (scale): - * - N > 0: round to the number with N decimal places after the decimal point - * - N < 0: round to an integer with N zero characters - * - N = 0: round to an integer - * - * Type of the result is the type of argument. - * For integer arguments, when passing negative scale, overflow can occur. - * In that case, the behavior is implementation specific. - */ +/// Rounding Functions: +/// - round(x, N) - rounding to nearest (N = 0 by default). Use banker's rounding for floating point numbers. +/// - roundBankers(x, N) - rounding to nearest (N = 0 by default). Use banker's rounding for all numbers. +/// - floor(x, N) is the largest number <= x (N = 0 by default). +/// - ceil(x, N) is the smallest number >= x (N = 0 by default). +/// - trunc(x, N) - is the largest by absolute value number that is not greater than x by absolute value (N = 0 by default). +/// The value of the parameter N (scale): +/// - N > 0: round to the number with N decimal places after the decimal point +/// - N < 0: round to an integer with N zero characters +/// - N = 0: round to an integer -/** This parameter controls the behavior of the rounding functions. - */ +/// Type of the result is the type of argument. +/// For integer arguments, when passing negative scale, overflow can occur. In that case, the behavior is undefined. + +/// Controls the behavior of the rounding functions. enum class ScaleMode : uint8_t { Positive, // round to a number with N decimal places after the decimal point @@ -75,7 +70,7 @@ enum class RoundingMode : uint8_t Ceil = _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC, Trunc = _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC, #else - Round = 8, /// Values are correspond to above just in case. + Round = 8, /// Values correspond to above values, just in case. Floor = 9, Ceil = 10, Trunc = 11, @@ -84,16 +79,21 @@ enum class RoundingMode : uint8_t enum class TieBreakingMode : uint8_t { - Auto, // use banker's rounding for floating point numbers, round up otherwise - Bankers, // use banker's rounding + Auto, /// banker's rounding for floating point numbers, round up otherwise + Bankers, /// banker's rounding +}; + +enum class Vectorize : uint8_t +{ + No, + Yes }; /// For N, no more than the number of digits in the largest type. using Scale = Int16; -/** Rounding functions for integer values. - */ +/// Rounding functions for integer values. template struct IntegerRoundingComputation { @@ -149,6 +149,8 @@ struct IntegerRoundingComputation return x; } } + + std::unreachable(); } static ALWAYS_INLINE T compute(T x, T scale) @@ -161,9 +163,12 @@ struct IntegerRoundingComputation case ScaleMode::Negative: return computeImpl(x, scale); } + + std::unreachable(); } - static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out) requires std::integral + static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out) + requires std::integral { if constexpr (sizeof(T) <= sizeof(scale) && scale_mode == ScaleMode::Negative) { @@ -176,20 +181,23 @@ struct IntegerRoundingComputation *out = compute(*in, static_cast(scale)); } - static ALWAYS_INLINE void compute(const T * __restrict in, T scale, T * __restrict out) requires(!std::integral) + static ALWAYS_INLINE void compute(const T * __restrict in, T scale, T * __restrict out) + requires(!std::integral) { *out = compute(*in, scale); } }; +template +class FloatRoundingComputationBase; + #ifdef __SSE4_1__ -template -class BaseFloatRoundingComputation; +/// Vectorized implementation for x86. template <> -class BaseFloatRoundingComputation +class FloatRoundingComputationBase { public: using ScalarType = Float32; @@ -210,7 +218,7 @@ public: }; template <> -class BaseFloatRoundingComputation +class FloatRoundingComputationBase { public: using ScalarType = Float64; @@ -230,9 +238,9 @@ public: } }; -#else +#endif -/// Implementation for ARM. Not vectorized. +/// Sequential implementation for ARM. Also used for scalar arguments. inline float roundWithMode(float x, RoundingMode mode) { @@ -243,6 +251,8 @@ inline float roundWithMode(float x, RoundingMode mode) case RoundingMode::Ceil: return ceilf(x); case RoundingMode::Trunc: return truncf(x); } + + std::unreachable(); } inline double roundWithMode(double x, RoundingMode mode) @@ -254,10 +264,12 @@ inline double roundWithMode(double x, RoundingMode mode) case RoundingMode::Ceil: return ceil(x); case RoundingMode::Trunc: return trunc(x); } + + std::unreachable(); } template -class BaseFloatRoundingComputation +class FloatRoundingComputationBase { public: using ScalarType = T; @@ -277,15 +289,13 @@ public: } }; -#endif - /** Implementation of low-level round-off functions for floating-point values. */ -template -class FloatRoundingComputation : public BaseFloatRoundingComputation +template +class FloatRoundingComputation : public FloatRoundingComputationBase { - using Base = BaseFloatRoundingComputation; + using Base = FloatRoundingComputationBase; public: static void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out) @@ -317,15 +327,22 @@ struct FloatRoundingImpl private: static_assert(!is_decimal); - using Op = FloatRoundingComputation; - using Data = std::array; + template + using Op = FloatRoundingComputation; + using Data = std::array::data_count>; using ColumnType = ColumnVector; using Container = typename ColumnType::Container; public: static NO_INLINE void apply(const Container & in, size_t scale, Container & out) { - auto mm_scale = Op::prepare(scale); + auto mm_scale = Op<>::prepare(scale); const size_t data_count = std::tuple_size(); @@ -337,7 +354,7 @@ public: while (p_in < limit) { - Op::compute(p_in, mm_scale, p_out); + Op<>::compute(p_in, mm_scale, p_out); p_in += data_count; p_out += data_count; } @@ -350,10 +367,17 @@ public: size_t tail_size_bytes = (end_in - p_in) * sizeof(*p_in); memcpy(&tmp_src, p_in, tail_size_bytes); - Op::compute(reinterpret_cast(&tmp_src), mm_scale, reinterpret_cast(&tmp_dst)); + Op<>::compute(reinterpret_cast(&tmp_src), mm_scale, reinterpret_cast(&tmp_dst)); memcpy(p_out, &tmp_dst, tail_size_bytes); } } + + static void applyOne(T in, size_t scale, T& out) + { + using ScalarOp = Op; + auto s = ScalarOp::prepare(scale); + ScalarOp::compute(&in, s, &out); + } }; template @@ -409,6 +433,11 @@ public: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'scale' parameter passed to function"); } } + + static void applyOne(T in, size_t scale, T& out) + { + Op::compute(&in, scale, &out); + } }; @@ -444,11 +473,40 @@ public: memcpy(out.data(), in.data(), in.size() * sizeof(T)); } } + + static void applyOne(NativeType in, UInt32 in_scale, NativeType& out, Scale scale_arg) + { + scale_arg = in_scale - scale_arg; + if (scale_arg > 0) + { + auto scale = intExp10OfSize(scale_arg); + Op::compute(&in, scale, &out); + } + else + { + memcpy(&out, &in, sizeof(T)); + } + } }; +/// Select the appropriate processing algorithm depending on the scale. +inline void validateScale(Int64 scale64) +{ + if (scale64 > std::numeric_limits::max() || scale64 < std::numeric_limits::min()) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale argument for rounding function is too large"); +} -/** Select the appropriate processing algorithm depending on the scale. - */ +inline Scale getScaleArg(const ColumnConst* scale_col) +{ + const auto & scale_field = scale_col->getField(); + + Int64 scale64 = scale_field.get(); + validateScale(scale64); + + return scale64; +} + +/// Generic dispatcher template struct Dispatcher { @@ -457,30 +515,65 @@ struct Dispatcher FloatRoundingImpl, IntegerRoundingImpl>; - static ColumnPtr apply(const IColumn * col_general, Scale scale_arg) + template + static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & col = checkAndGetColumn>(*col_general); + const auto & value_col_typed = checkAndGetColumn>(*value_col); auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(col.getData().size()); + vec_res.resize(value_col_typed.getData().size()); if (!vec_res.empty()) { - if (scale_arg == 0) + if (scale_col == nullptr || isColumnConst(*scale_col)) { - size_t scale = 1; - FunctionRoundingImpl::apply(col.getData(), scale, vec_res); + auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + if (scale_arg == 0) + { + size_t scale = 1; + FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + } + else if (scale_arg > 0) + { + size_t scale = intExp10(scale_arg); + FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + } + else + { + size_t scale = intExp10(-scale_arg); + FunctionRoundingImpl::apply(value_col_typed.getData(), scale, vec_res); + } } - else if (scale_arg > 0) + /// Non-const scale argument: + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) { - size_t scale = intExp10(scale_arg); - FunctionRoundingImpl::apply(col.getData(), scale, vec_res); - } - else - { - size_t scale = intExp10(-scale_arg); - FunctionRoundingImpl::apply(col.getData(), scale, vec_res); + const auto & value_data = value_col_typed.getData(); + const auto & scale_data = scale_col_typed->getData(); + const size_t rows = value_data.size(); + + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale_data[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + if (raw_scale == 0) + { + size_t scale = 1; + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else if (raw_scale > 0) + { + size_t scale = intExp10(raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + else + { + size_t scale = intExp10(-raw_scale); + FunctionRoundingImpl::applyOne(value_data[i], scale, vec_res[i]); + } + } } } @@ -488,28 +581,51 @@ struct Dispatcher } }; +/// Dispatcher for Decimal inputs template struct Dispatcher { public: - static ColumnPtr apply(const IColumn * col_general, Scale scale_arg) + template + static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr) { - const auto & col = checkAndGetColumn>(*col_general); - const typename ColumnDecimal::Container & vec_src = col.getData(); + const auto & value_col_typed = checkAndGetColumn>(*value_col); + const typename ColumnDecimal::Container & vec_src = value_col_typed.getData(); - auto col_res = ColumnDecimal::create(vec_src.size(), col.getScale()); + auto col_res = ColumnDecimal::create(vec_src.size(), value_col_typed.getScale()); auto & vec_res = col_res->getData(); if (!vec_res.empty()) - DecimalRoundingImpl::apply(col.getData(), col.getScale(), vec_res, scale_arg); + { + if (scale_col == nullptr || isColumnConst(*scale_col)) + { + auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst>(scale_col)); + DecimalRoundingImpl::apply(value_col_typed.getData(), value_col_typed.getScale(), vec_res, scale_arg); + } + /// Non-const scale argument + else if (const auto * scale_col_typed = checkAndGetColumn>(scale_col)) + { + const auto & scale = scale_col_typed->getData(); + const size_t rows = vec_src.size(); + + for (size_t i = 0; i < rows; ++i) + { + Int64 scale64 = scale[i]; + validateScale(scale64); + Scale raw_scale = scale64; + + DecimalRoundingImpl::applyOne(value_col_typed.getElement(i), value_col_typed.getScale(), + reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + } + } + } return col_res; } }; -/** A template for functions that round the value of an input parameter of type - * (U)Int8/16/32/64, Float32/64 or Decimal32/64/128, and accept an additional optional parameter (default is 0). - */ +/// Functions that round the value of an input parameter of type (U)Int8/16/32/64, Float32/64 or Decimal32/64/128. +/// Accept an additional optional parameter of type (U)Int8/16/32/64 (0 by default). template class FunctionRounding : public IFunction { @@ -517,75 +633,58 @@ public: static constexpr auto name = Name::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } - + String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - /// Get result types by argument types. If the function does not apply to these arguments, throw an exception. - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if ((arguments.empty()) || (arguments.size() > 2)) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.", - getName(), arguments.size()); - - for (const auto & type : arguments) - if (!isNumber(type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arguments[0]->getName(), getName()); - - return arguments[0]; - } - - static Scale getScaleArg(const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() == 2) - { - const IColumn & scale_column = *arguments[1].column; - if (!isColumnConst(scale_column)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must be constant"); - - Field scale_field = assert_cast(scale_column).getField(); - if (scale_field.getType() != Field::Types::UInt64 - && scale_field.getType() != Field::Types::Int64) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must have integer type"); - - Int64 scale64 = scale_field.get(); - if (scale64 > std::numeric_limits::max() - || scale64 < std::numeric_limits::min()) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale argument for rounding function is too large"); - - return scale64; - } - return 0; - } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors mandatory_args{ + {"x", static_cast(&isNumber), nullptr, "A number to round"}, + }; + FunctionArgumentDescriptors optional_args{ + {"N", static_cast(&isNativeInteger), nullptr, "The number of decimal places to round to"}, + }; + validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + + return arguments[0].type; + } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { - const ColumnWithTypeAndName & column = arguments[0]; - Scale scale_arg = getScaleArg(arguments); + const ColumnWithTypeAndName & value_arg = arguments[0]; ColumnPtr res; - auto call = [&](const auto & types) -> bool + auto call_data = [&](const auto & types) -> bool { using Types = std::decay_t; - using DataType = typename Types::LeftType; + using DataType = typename Types::RightType; - if constexpr (IsDataTypeNumber || IsDataTypeDecimal) + if (arguments.size() > 1) { - using FieldType = typename DataType::FieldType; - res = Dispatcher::apply(column.column.get(), scale_arg); + const ColumnWithTypeAndName & scale_column = arguments[1]; + + auto call_scale = [&](const auto & scaleTypes) -> bool + { + using ScaleTypes = std::decay_t; + using ScaleType = typename ScaleTypes::RightType; + + if (isColumnConst(*value_arg.column) && !isColumnConst(*scale_column.column)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale column must be const for const data column"); + + res = Dispatcher::template apply(value_arg.column.get(), scale_column.column.get()); + return true; + }; + + TypeIndex right_index = scale_column.type->getTypeId(); + if (!callOnBasicType(right_index, call_scale)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must have integer type"); return true; } - return false; + res = Dispatcher::template apply(value_arg.column.get()); + return true; }; #if !defined(__SSE4_1__) @@ -597,10 +696,9 @@ public: throw Exception(ErrorCodes::CANNOT_SET_ROUNDING_MODE, "Cannot set floating point rounding mode"); #endif - if (!callOnIndexAndDataType(column.type->getTypeId(), call)) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column.name, getName()); - } + TypeIndex left_index = value_arg.type->getTypeId(); + if (!callOnBasicType(left_index, call_data)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", value_arg.name, getName()); return res; } @@ -617,9 +715,8 @@ public: }; -/** Rounds down to a number within explicitly specified array. - * If the value is less than the minimal bound - returns the minimal bound. - */ +/// Rounds down to a number within explicitly specified array. +/// If the value is less than the minimal bound - returns the minimal bound. class FunctionRoundDown : public IFunction { public: @@ -627,7 +724,6 @@ public: static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } - bool isVariadic() const override { return false; } size_t getNumberOfArguments() const override { return 2; } bool useDefaultImplementationForConstants() const override { return true; } diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp index 6cb23bbea9f..48f4aaf4e09 100644 --- a/src/Functions/FunctionsStringDistance.cpp +++ b/src/Functions/FunctionsStringDistance.cpp @@ -113,6 +113,36 @@ struct ByteHammingDistanceImpl } }; +void parseUTF8String(const char * __restrict data, size_t size, std::function utf8_consumer, std::function ascii_consumer = nullptr) +{ + const char * end = data + size; + while (data < end) + { + size_t len = UTF8::seqLength(*data); + if (len == 1) + { + if (ascii_consumer) + ascii_consumer(static_cast(*data)); + else + utf8_consumer(static_cast(*data)); + ++data; + } + else + { + auto code_point = UTF8::convertUTF8ToCodePoint(data, end - data); + if (code_point.has_value()) + { + utf8_consumer(code_point.value()); + data += len; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(data, end - data)); + } + } + } +} + template struct ByteJaccardIndexImpl { @@ -138,57 +168,28 @@ struct ByteJaccardIndexImpl haystack_set.fill(0); needle_set.fill(0); - while (haystack < haystack_end) + if constexpr (is_utf8) { - size_t len = 1; - if constexpr (is_utf8) - len = UTF8::seqLength(*haystack); - - if (len == 1) + parseUTF8String( + haystack, + haystack_size, + [&](UInt32 data) { haystack_utf8_set.insert(data); }, + [&](unsigned char data) { haystack_set[data] = 1; }); + parseUTF8String( + needle, needle_size, [&](UInt32 data) { needle_utf8_set.insert(data); }, [&](unsigned char data) { needle_set[data] = 1; }); + } + else + { + while (haystack < haystack_end) { haystack_set[static_cast(*haystack)] = 1; ++haystack; } - else - { - auto code_point = UTF8::convertUTF8ToCodePoint(haystack, haystack_end - haystack); - if (code_point.has_value()) - { - haystack_utf8_set.insert(code_point.value()); - haystack += len; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(haystack, haystack_end - haystack)); - } - } - } - - while (needle < needle_end) - { - - size_t len = 1; - if constexpr (is_utf8) - len = UTF8::seqLength(*needle); - - if (len == 1) + while (needle < needle_end) { needle_set[static_cast(*needle)] = 1; ++needle; } - else - { - auto code_point = UTF8::convertUTF8ToCodePoint(needle, needle_end - needle); - if (code_point.has_value()) - { - needle_utf8_set.insert(code_point.value()); - needle += len; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(needle, needle_end - needle)); - } - } } UInt8 intersection = 0; @@ -226,6 +227,7 @@ struct ByteJaccardIndexImpl static constexpr size_t max_string_size = 1u << 16; +template struct ByteEditDistanceImpl { using ResultType = UInt64; @@ -242,6 +244,16 @@ struct ByteEditDistanceImpl ErrorCodes::TOO_LARGE_STRING_SIZE, "The string size is too big for function editDistance, should be at most {}", max_string_size); + PaddedPODArray haystack_utf8; + PaddedPODArray needle_utf8; + if constexpr (is_utf8) + { + parseUTF8String(haystack, haystack_size, [&](UInt32 data) { haystack_utf8.push_back(data); }); + parseUTF8String(needle, needle_size, [&](UInt32 data) { needle_utf8.push_back(data); }); + haystack_size = haystack_utf8.size(); + needle_size = needle_utf8.size(); + } + PaddedPODArray distances0(haystack_size + 1, 0); PaddedPODArray distances1(haystack_size + 1, 0); @@ -261,9 +273,16 @@ struct ByteEditDistanceImpl insertion = distances1[pos_haystack] + 1; substitution = distances0[pos_haystack]; - if (*(needle + pos_needle) != *(haystack + pos_haystack)) - substitution += 1; - + if constexpr (is_utf8) + { + if (needle_utf8[pos_needle] != haystack_utf8[pos_haystack]) + substitution += 1; + } + else + { + if (*(needle + pos_needle) != *(haystack + pos_haystack)) + substitution += 1; + } distances1[pos_haystack + 1] = std::min(deletion, std::min(substitution, insertion)); } distances0.swap(distances1); @@ -457,7 +476,12 @@ struct NameEditDistance { static constexpr auto name = "editDistance"; }; -using FunctionEditDistance = FunctionsStringSimilarity, NameEditDistance>; +using FunctionEditDistance = FunctionsStringSimilarity>, NameEditDistance>; +struct NameEditDistanceUTF8 +{ + static constexpr auto name = "editDistanceUTF8"; +}; +using FunctionEditDistanceUTF8 = FunctionsStringSimilarity>, NameEditDistanceUTF8>; struct NameDamerauLevenshteinDistance { @@ -499,6 +523,10 @@ REGISTER_FUNCTION(StringDistance) FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"}); factory.registerAlias("levenshteinDistance", NameEditDistance::name); + factory.registerFunction( + FunctionDocumentation{.description = R"(Calculates the edit distance between two UTF8 strings.)"}); + factory.registerAlias("levenshteinDistanceUTF8", NameEditDistanceUTF8::name); + factory.registerFunction( FunctionDocumentation{.description = R"(Calculates the Damerau-Levenshtein distance two between two byte-string.)"}); diff --git a/src/Functions/LeastGreatestGeneric.h b/src/Functions/LeastGreatestGeneric.h index 9073f14d679..bbab001b00d 100644 --- a/src/Functions/LeastGreatestGeneric.h +++ b/src/Functions/LeastGreatestGeneric.h @@ -111,7 +111,7 @@ public: argument_types.push_back(argument.type); /// More efficient specialization for two numeric arguments. - if (arguments.size() == 2 && isNumber(arguments[0].type) && isNumber(arguments[1].type)) + if (arguments.size() == 2 && isNumber(removeNullable(arguments[0].type)) && isNumber(removeNullable(arguments[1].type))) return std::make_unique(SpecializedFunction::create(context), argument_types, return_type); return std::make_unique( @@ -123,7 +123,7 @@ public: if (types.empty()) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} cannot be called without arguments", getName()); - if (types.size() == 2 && isNumber(types[0]) && isNumber(types[1])) + if (types.size() == 2 && isNumber(removeNullable(types[0])) && isNumber(removeNullable(types[1]))) return SpecializedFunction::create(context)->getReturnTypeImpl(types); return getLeastSupertype(types); diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp index 03aa5fb9086..adb1bb707d8 100644 --- a/src/Functions/array/arrayAggregation.cpp +++ b/src/Functions/array/arrayAggregation.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include #include @@ -102,6 +104,11 @@ struct ArrayAggregateImpl static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & /*array_element*/) { + if (aggregate_operation == AggregateOperation::max || aggregate_operation == AggregateOperation::min) + { + return expression_return; + } + DataTypePtr result; auto call = [&](const auto & types) @@ -133,31 +140,6 @@ struct ArrayAggregateImpl return true; } } - else if constexpr (aggregate_operation == AggregateOperation::max || aggregate_operation == AggregateOperation::min) - { - if constexpr (IsDataTypeDate) - { - result = std::make_shared(); - - return true; - } - else if constexpr (!IsDataTypeDecimal) - { - std::string timezone = getDateTimeTimezone(*expression_return); - result = std::make_shared(timezone); - - return true; - } - else - { - std::string timezone = getDateTimeTimezone(*expression_return); - UInt32 scale = getDecimalScale(*expression_return); - result = std::make_shared(scale, timezone); - - return true; - } - } - return false; }; @@ -378,6 +360,47 @@ struct ArrayAggregateImpl static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) { + if constexpr (aggregate_operation == AggregateOperation::max || aggregate_operation == AggregateOperation::min) + { + MutableColumnPtr res; + const auto & column = array.getDataPtr(); + const ColumnConst * const_column = checkAndGetColumn(&*column); + if (const_column) + { + res = const_column->getDataColumn().cloneEmpty(); + } + else + { + res = column->cloneEmpty(); + } + const IColumn::Offsets & offsets = array.getOffsets(); + size_t pos = 0; + for (const auto & offset : offsets) + { + if (offset == pos) + { + res->insertDefault(); + continue; + } + size_t current_max_or_min_index = pos; + ++pos; + for (; pos < offset; ++pos) + { + int compare_result = column->compareAt(pos, current_max_or_min_index, *column, 1); + if (aggregate_operation == AggregateOperation::max && compare_result > 0) + { + current_max_or_min_index = pos; + } + else if (aggregate_operation == AggregateOperation::min && compare_result < 0) + { + current_max_or_min_index = pos; + } + } + res->insert((*column)[current_max_or_min_index]); + } + return res; + } + const IColumn::Offsets & offsets = array.getOffsets(); ColumnPtr res; diff --git a/src/Functions/base64Decode.cpp b/src/Functions/base64Decode.cpp index 5f7a3406c62..50278c4b0b2 100644 --- a/src/Functions/base64Decode.cpp +++ b/src/Functions/base64Decode.cpp @@ -7,7 +7,14 @@ namespace DB { REGISTER_FUNCTION(Base64Decode) { - factory.registerFunction>(); + FunctionDocumentation::Description description = R"(Accepts a String and decodes it from base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error. Alias: FROM_BASE64.)"; + FunctionDocumentation::Syntax syntax = "base64Decode(encoded)"; + FunctionDocumentation::Arguments arguments = {{"encoded", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}}; + FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument."; + FunctionDocumentation::Examples examples = {{"Example", "SELECT base64Decode('Y2xpY2tob3VzZQ==')", "clickhouse"}}; + FunctionDocumentation::Categories categories = {"String encoding"}; + + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); /// MySQL compatibility alias. factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::CaseInsensitive); diff --git a/src/Functions/base64Encode.cpp b/src/Functions/base64Encode.cpp index 69268f5a25d..d6e63c38a4c 100644 --- a/src/Functions/base64Encode.cpp +++ b/src/Functions/base64Encode.cpp @@ -7,7 +7,14 @@ namespace DB { REGISTER_FUNCTION(Base64Encode) { - factory.registerFunction>(); + FunctionDocumentation::Description description = R"(Encodes a String as base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Alias: TO_BASE64.)"; + FunctionDocumentation::Syntax syntax = "base64Encode(plaintext)"; + FunctionDocumentation::Arguments arguments = {{"plaintext", "String column or constant."}}; + FunctionDocumentation::ReturnedValue returned_value = "A string containing the encoded value of the argument."; + FunctionDocumentation::Examples examples = {{"Example", "SELECT base64Encode('clickhouse')", "Y2xpY2tob3VzZQ=="}}; + FunctionDocumentation::Categories categories = {"String encoding"}; + + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); /// MySQL compatibility alias. factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::CaseInsensitive); diff --git a/src/Functions/base64URLDecode.cpp b/src/Functions/base64URLDecode.cpp new file mode 100644 index 00000000000..f5766dc60bd --- /dev/null +++ b/src/Functions/base64URLDecode.cpp @@ -0,0 +1,21 @@ +#include + +#if USE_BASE64 +#include + +namespace DB +{ +REGISTER_FUNCTION(Base64URLDecode) +{ + FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; + FunctionDocumentation::Syntax syntax = "base64URLDecode(encodedURL)"; + FunctionDocumentation::Arguments arguments = {{"encodedURL", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}}; + FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument."; + FunctionDocumentation::Examples examples = {{"Example", "SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')", "https://clickhouse.com"}}; + FunctionDocumentation::Categories categories = {"String encoding"}; + + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); +} +} + +#endif diff --git a/src/Functions/base64URLEncode.cpp b/src/Functions/base64URLEncode.cpp new file mode 100644 index 00000000000..73a465a30c5 --- /dev/null +++ b/src/Functions/base64URLEncode.cpp @@ -0,0 +1,21 @@ +#include + +#if USE_BASE64 +#include + +namespace DB +{ +REGISTER_FUNCTION(Base64URLEncode) +{ + FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; + FunctionDocumentation::Syntax syntax = "base64URLEncode(url)"; + FunctionDocumentation::Arguments arguments = {{"url", "String column or constant."}}; + FunctionDocumentation::ReturnedValue returned_value = "A string containing the encoded value of the argument."; + FunctionDocumentation::Examples examples = {{"Example", "SELECT base64URLEncode('https://clickhouse.com')", "aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ"}}; + FunctionDocumentation::Categories categories = {"String encoding"}; + + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); +} +} + +#endif diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index 78ec9c8b773..cb6b83c1cf1 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int PARAMETER_OUT_OF_BOUND; } namespace @@ -21,12 +22,21 @@ struct BitTestImpl static const constexpr bool allow_string_integer = false; template - NO_SANITIZE_UNDEFINED static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) + static Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); else - return (typename NumberTraits::ToInteger::Type(a) >> typename NumberTraits::ToInteger::Type(b)) & 1; + { + typename NumberTraits::ToInteger::Type a_int = a; + typename NumberTraits::ToInteger::Type b_int = b; + const auto max_position = static_cast((8 * sizeof(a)) - 1); + if (b_int > max_position || b_int < 0) + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "The bit position argument needs to a positive value and less or equal to {} for integer {}", + std::to_string(max_position), std::to_string(a_int)); + return (a_int >> b_int) & 1; + } } #if USE_EMBEDDED_COMPILER diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index 68cfcdb8d90..b011c33e02a 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -46,25 +46,30 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.size() < 2) - throw Exception( - ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, - "Number of arguments for function {} doesn't match: passed {}, should be at least 2", - getName(), - arguments.size()); + if (arguments.size() == 1) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Number of arguments for function {} should not be 1", getName()); return std::make_shared(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (arguments.empty()) + { + auto res_data = ColumnString::create(); + res_data->insertDefault(); + return ColumnConst::create(std::move(res_data), input_rows_count); + } + else if (arguments.size() == 1) + return arguments[0].column; /// Format function is not proven to be faster for two arguments. /// Actually there is overhead of 2 to 5 extra instructions for each string for checking empty strings in FormatImpl. /// Though, benchmarks are really close, for most examples we saw executeBinary is slightly faster (0-3%). /// For 3 and more arguments FormatStringImpl is much faster (up to 50-60%). - if (arguments.size() == 2) + else if (arguments.size() == 2) return executeBinary(arguments, input_rows_count); - return executeFormatImpl(arguments, input_rows_count); + else + return executeFormatImpl(arguments, input_rows_count); } private: @@ -209,11 +214,11 @@ public: { if (arguments.size() == 1) return FunctionFactory::instance().getImpl("toString", context)->build(arguments); - if (std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); })) + if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); })) return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments); - if (std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); })) + if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); })) return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments); - if (std::ranges::all_of(arguments, [](const auto & elem) { return isTuple(elem.type); })) + if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isTuple(elem.type); })) return FunctionFactory::instance().getImpl("tupleConcat", context)->build(arguments); return std::make_unique( FunctionConcat::create(context), @@ -221,15 +226,8 @@ public: return_type); } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { - if (arguments.empty()) - throw Exception( - ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, - "Number of arguments for function {} doesn't match: passed {}, should be at least 1.", - getName(), - arguments.size()); - /// We always return Strings from concat, even if arguments were fixed strings. return std::make_shared(); } diff --git a/src/Functions/dateTimeToSnowflakeID.cpp b/src/Functions/dateTimeToSnowflakeID.cpp new file mode 100644 index 00000000000..968a7628ca5 --- /dev/null +++ b/src/Functions/dateTimeToSnowflakeID.cpp @@ -0,0 +1,158 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace +{ + +/// See generateSnowflakeID.cpp +constexpr size_t time_shift = 22; + +} + +class FunctionDateTimeToSnowflakeID : public IFunction +{ +public: + static constexpr auto name = "dateTimeToSnowflakeID"; + + static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isDateTime), nullptr, "DateTime"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & col_src = *arguments[0].column; + + UInt64 epoch = 0; + if (arguments.size() == 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnUInt64::create(input_rows_count); + auto & res_data = col_res->getData(); + + const auto & src_data = typeid_cast(col_src).getData(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = (static_cast(src_data[i]) * 1000 - epoch) << time_shift; + return col_res; + } +}; + + +class FunctionDateTime64ToSnowflakeID : public IFunction +{ +public: + static constexpr auto name = "dateTime64ToSnowflakeID"; + + static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & col_src = *arguments[0].column; + const auto & src_data = typeid_cast(col_src).getData(); + + UInt64 epoch = 0; + if (arguments.size() == 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnUInt64::create(input_rows_count); + auto & res_data = col_res->getData(); + + /// timestamps in snowflake-ids are millisecond-based, convert input to milliseconds + UInt32 src_scale = getDecimalScale(*arguments[0].type); + Int64 multiplier_msec = DecimalUtils::scaleMultiplier(3); + Int64 multiplier_src = DecimalUtils::scaleMultiplier(src_scale); + auto factor = multiplier_msec / static_cast(multiplier_src); + + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = std::llround(src_data[i] * factor - epoch) << time_shift; + + return col_res; + } +}; + +REGISTER_FUNCTION(DateTimeToSnowflakeID) +{ + { + FunctionDocumentation::Description description = R"(Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.)"; + FunctionDocumentation::Syntax syntax = "dateTimeToSnowflakeID(value[, epoch])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Date with time. [DateTime](../data-types/datetime.md)."}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT dateTimeToSnowflakeID(toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai'))", "6832626392367104000"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } + + { + FunctionDocumentation::Description description = R"(Converts a [DateTime64](../data-types/datetime64.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.)"; + FunctionDocumentation::Syntax syntax = "dateTime64ToSnowflakeID(value[, epoch])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Date with time. [DateTime64](../data-types/datetime.md)."}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT dateTime64ToSnowflakeID(toDateTime64('2021-08-15 18:57:56', 3, 'Asia/Shanghai'))", "6832626394434895872"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } +} + +} diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index f1e47ea1158..8ac010deafc 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -207,7 +207,7 @@ public: REGISTER_FUNCTION(GenerateSnowflakeID) { - FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; + FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression])"; FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64"; diff --git a/src/Functions/geometryConverters.h b/src/Functions/geometryConverters.h index 97162fa9dd0..03831d37e0c 100644 --- a/src/Functions/geometryConverters.h +++ b/src/Functions/geometryConverters.h @@ -28,6 +28,9 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +template +using LineString = boost::geometry::model::linestring; + template using Ring = boost::geometry::model::ring; @@ -38,11 +41,13 @@ template using MultiPolygon = boost::geometry::model::multi_polygon>; using CartesianPoint = boost::geometry::model::d2::point_xy; +using CartesianLineString = LineString; using CartesianRing = Ring; using CartesianPolygon = Polygon; using CartesianMultiPolygon = MultiPolygon; using SphericalPoint = boost::geometry::model::point>; +using SphericalLineString = LineString; using SphericalRing = Ring; using SphericalPolygon = Polygon; using SphericalMultiPolygon = MultiPolygon; @@ -85,6 +90,29 @@ struct ColumnToPointsConverter } }; + +/** + * Class which converts Column with type Array(Tuple(Float64, Float64)) to a vector of boost linestring type. +*/ +template +struct ColumnToLineStringsConverter +{ + static std::vector> convert(ColumnPtr col) + { + const IColumn::Offsets & offsets = typeid_cast(*col).getOffsets(); + size_t prev_offset = 0; + std::vector> answer; + answer.reserve(offsets.size()); + auto tmp = ColumnToPointsConverter::convert(typeid_cast(*col).getDataPtr()); + for (size_t offset : offsets) + { + answer.emplace_back(tmp.begin() + prev_offset, tmp.begin() + offset); + prev_offset = offset; + } + return answer; + } +}; + /** * Class which converts Column with type Array(Tuple(Float64, Float64)) to a vector of boost ring type. */ @@ -208,6 +236,39 @@ private: ColumnFloat64::Container & second_container; }; +/// Serialize Point, LineString as LineString +template +class LineStringSerializer +{ +public: + LineStringSerializer() + : offsets(ColumnUInt64::create()) + {} + + explicit LineStringSerializer(size_t n) + : offsets(ColumnUInt64::create(n)) + {} + + void add(const LineString & ring) + { + size += ring.size(); + offsets->insertValue(size); + for (const auto & point : ring) + point_serializer.add(point); + } + + ColumnPtr finalize() + { + return ColumnArray::create(point_serializer.finalize(), std::move(offsets)); + } + +private: + size_t size = 0; + PointSerializer point_serializer; + ColumnUInt64::MutablePtr offsets; +}; + +/// Almost the same as LineStringSerializer /// Serialize Point, Ring as Ring template class RingSerializer @@ -344,8 +405,16 @@ static void callOnGeometryDataType(DataTypePtr type, F && f) /// There is no Point type, because for most of geometry functions it is useless. if (factory.get("Point")->equals(*type)) return f(ConverterType>()); + + /// We should take the name into consideration to avoid ambiguity. + /// Because for example both Ring and LineString are resolved to Array(Tuple(Point)). + else if (factory.get("LineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "LineString") + return f(ConverterType>()); + + /// For backward compatibility if we call this function not on a custom type, we will consider Array(Tuple(Point)) as type Ring. else if (factory.get("Ring")->equals(*type)) return f(ConverterType>()); + else if (factory.get("Polygon")->equals(*type)) return f(ConverterType>()); else if (factory.get("MultiPolygon")->equals(*type)) diff --git a/src/Functions/isNotNull.cpp b/src/Functions/isNotNull.cpp index ea95a5c2b1c..a10e7ebd40c 100644 --- a/src/Functions/isNotNull.cpp +++ b/src/Functions/isNotNull.cpp @@ -29,6 +29,18 @@ public: return name; } + ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override + { + const ColumnWithTypeAndName & elem = arguments[0]; + if (elem.type->onlyNull()) + return result_type->createColumnConst(1, UInt8(0)); + + if (canContainNull(*elem.type)) + return nullptr; + + return result_type->createColumnConst(1, UInt8(1)); + } + size_t getNumberOfArguments() const override { return 1; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } diff --git a/src/Functions/isNull.cpp b/src/Functions/isNull.cpp index a98ff2ab8e8..95d659b103b 100644 --- a/src/Functions/isNull.cpp +++ b/src/Functions/isNull.cpp @@ -31,6 +31,18 @@ public: return name; } + ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override + { + const ColumnWithTypeAndName & elem = arguments[0]; + if (elem.type->onlyNull()) + return result_type->createColumnConst(1, UInt8(1)); + + if (canContainNull(*elem.type)) + return nullptr; + + return result_type->createColumnConst(1, UInt8(0)); + } + size_t getNumberOfArguments() const override { return 1; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } diff --git a/src/Functions/isNullable.cpp b/src/Functions/isNullable.cpp index 14874487f40..b24ee4f5e73 100644 --- a/src/Functions/isNullable.cpp +++ b/src/Functions/isNullable.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -23,6 +24,15 @@ public: return name; } + ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override + { + const ColumnWithTypeAndName & elem = arguments[0]; + if (elem.type->onlyNull() || canContainNull(*elem.type)) + return result_type->createColumnConst(1, UInt8(1)); + + return result_type->createColumnConst(1, UInt8(0)); + } + bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForNothing() const override { return false; } diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 11e210d2cc2..162b8c58873 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -978,8 +978,7 @@ namespace [[nodiscard]] static PosOrError mysqlAmericanDate(Pos cur, Pos end, const String & fragment, DateTime & date) { - if (auto status = checkSpace(cur, end, 8, "mysqlAmericanDate requires size >= 8", fragment)) - return tl::unexpected(status.error()); + RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 8, "mysqlAmericanDate requires size >= 8", fragment)) Int32 month; ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumber2(cur, end, fragment, month))) @@ -993,7 +992,7 @@ namespace Int32 year; ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumber2(cur, end, fragment, year))) - RETURN_ERROR_IF_FAILED(date.setYear(year)) + RETURN_ERROR_IF_FAILED(date.setYear(year + 2000)) return cur; } @@ -1015,8 +1014,7 @@ namespace [[nodiscard]] static PosOrError mysqlISO8601Date(Pos cur, Pos end, const String & fragment, DateTime & date) { - if (auto status = checkSpace(cur, end, 10, "mysqlISO8601Date requires size >= 10", fragment)) - return tl::unexpected(status.error()); + RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 10, "mysqlISO8601Date requires size >= 10", fragment)) Int32 year; Int32 month; @@ -1462,8 +1460,7 @@ namespace [[nodiscard]] static PosOrError jodaDayOfWeekText(size_t /*min_represent_digits*/, Pos cur, Pos end, const String & fragment, DateTime & date) { - if (auto result= checkSpace(cur, end, 3, "jodaDayOfWeekText requires size >= 3", fragment); !result.has_value()) - return tl::unexpected(result.error()); + RETURN_ERROR_IF_FAILED(checkSpace(cur, end, 3, "jodaDayOfWeekText requires size >= 3", fragment)) String text1(cur, 3); boost::to_lower(text1); @@ -1556,8 +1553,8 @@ namespace Int32 day_of_month; ASSIGN_RESULT_OR_RETURN_ERROR(cur, (readNumberWithVariableLength( cur, end, false, false, false, repetitions, std::max(repetitions, 2uz), fragment, day_of_month))) - if (auto res = date.setDayOfMonth(day_of_month); !res.has_value()) - return tl::unexpected(res.error()); + RETURN_ERROR_IF_FAILED(date.setDayOfMonth(day_of_month)) + return cur; } diff --git a/src/Functions/polygonsIntersection.cpp b/src/Functions/polygonsIntersection.cpp index 77484e7e63c..329242e762e 100644 --- a/src/Functions/polygonsIntersection.cpp +++ b/src/Functions/polygonsIntersection.cpp @@ -73,6 +73,8 @@ public: if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsSymDifference.cpp b/src/Functions/polygonsSymDifference.cpp index 194b7f2cfd7..3c219d0facb 100644 --- a/src/Functions/polygonsSymDifference.cpp +++ b/src/Functions/polygonsSymDifference.cpp @@ -71,6 +71,8 @@ public: if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsUnion.cpp b/src/Functions/polygonsUnion.cpp index 37d865af50a..969eb2f78fb 100644 --- a/src/Functions/polygonsUnion.cpp +++ b/src/Functions/polygonsUnion.cpp @@ -71,6 +71,8 @@ public: if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsWithin.cpp b/src/Functions/polygonsWithin.cpp index 35a9e17cdfd..c63ad5ef868 100644 --- a/src/Functions/polygonsWithin.cpp +++ b/src/Functions/polygonsWithin.cpp @@ -75,6 +75,8 @@ public: if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index ddc847b1ca5..eb262777b0d 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -82,6 +82,11 @@ struct ReadWKTPointNameHolder static constexpr const char * name = "readWKTPoint"; }; +struct ReadWKTLineStringNameHolder +{ + static constexpr const char * name = "readWKTLineString"; +}; + struct ReadWKTRingNameHolder { static constexpr const char * name = "readWKTRing"; @@ -102,6 +107,30 @@ struct ReadWKTMultiPolygonNameHolder REGISTER_FUNCTION(ReadWKT) { factory.registerFunction, ReadWKTPointNameHolder>>(); + factory.registerFunction, ReadWKTLineStringNameHolder>>(FunctionDocumentation + { + .description=R"( +Parses a Well-Known Text (WKT) representation of a LineString geometry and returns it in the internal ClickHouse format. +)", + .syntax = "readWKTLineString(wkt_string)", + .arguments{ + {"wkt_string", "The input WKT string representing a LineString geometry."} + }, + .returned_value = "The function returns a ClickHouse internal representation of the linestring geometry.", + .examples{ + {"first call", "SELECT readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)');", R"( +┌─readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)')─┐ +│ [(1,1),(2,2),(3,3),(1,1)] │ +└──────────────────────────────────────────────────────┘ + )"}, + {"second call", "SELECT toTypeName(readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)'));", R"( +┌─toTypeName(readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)'))─┐ +│ LineString │ +└──────────────────────────────────────────────────────────────────┘ + )"}, + }, + .categories{"Unique identifiers"} + }); factory.registerFunction, ReadWKTRingNameHolder>>(); factory.registerFunction, ReadWKTPolygonNameHolder>>(); factory.registerFunction, ReadWKTMultiPolygonNameHolder>>(); diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 4a2d502a31a..5ff8a636058 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -11,11 +11,17 @@ #include +/// ------------------------------------------------------------------------------------------------------------------------------ +/// The functions in this file are deprecated and should be removed in favor of functions 'snowflakeIDToDateTime[64]' and +/// 'dateTime[64]ToSnowflakeID' by summer 2025. Please also mark setting `allow_deprecated_snowflake_conversion_functions` as obsolete then. +/// ------------------------------------------------------------------------------------------------------------------------------ + namespace DB { namespace ErrorCodes { + extern const int DEPRECATED_FUNCTION; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -34,10 +40,19 @@ constexpr int time_shift = 22; class FunctionDateTimeToSnowflake : public IFunction { private: - const char * name; + const bool allow_deprecated_snowflake_conversion_functions; public: - explicit FunctionDateTimeToSnowflake(const char * name_) : name(name_) { } + static constexpr auto name = "dateTimeToSnowflake"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionDateTimeToSnowflake(ContextPtr context) + : allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) + {} String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -56,6 +71,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to 'true'", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -73,13 +91,20 @@ public: class FunctionSnowflakeToDateTime : public IFunction { private: - const char * name; const bool allow_nonconst_timezone_arguments; + const bool allow_deprecated_snowflake_conversion_functions; public: - explicit FunctionSnowflakeToDateTime(const char * name_, ContextPtr context) - : name(name_) - , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + static constexpr auto name = "snowflakeToDateTime"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionSnowflakeToDateTime(ContextPtr context) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) + , allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) {} String getName() const override { return name; } @@ -107,6 +132,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to 'true'", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -138,10 +166,19 @@ public: class FunctionDateTime64ToSnowflake : public IFunction { private: - const char * name; + const bool allow_deprecated_snowflake_conversion_functions; public: - explicit FunctionDateTime64ToSnowflake(const char * name_) : name(name_) { } + static constexpr auto name = "dateTime64ToSnowflake"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionDateTime64ToSnowflake(ContextPtr context) + : allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) + {} String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -160,6 +197,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to true", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -185,13 +225,20 @@ public: class FunctionSnowflakeToDateTime64 : public IFunction { private: - const char * name; const bool allow_nonconst_timezone_arguments; + const bool allow_deprecated_snowflake_conversion_functions; public: - explicit FunctionSnowflakeToDateTime64(const char * name_, ContextPtr context) - : name(name_) - , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + static constexpr auto name = "snowflakeToDateTime64"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionSnowflakeToDateTime64(ContextPtr context) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) + , allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) {} String getName() const override { return name; } @@ -219,6 +266,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to true", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -246,27 +296,12 @@ public: } -REGISTER_FUNCTION(DateTimeToSnowflake) +REGISTER_FUNCTION(LegacySnowflakeConversion) { - factory.registerFunction("dateTimeToSnowflake", - [](ContextPtr){ return std::make_shared("dateTimeToSnowflake"); }); -} - -REGISTER_FUNCTION(DateTime64ToSnowflake) -{ - factory.registerFunction("dateTime64ToSnowflake", - [](ContextPtr){ return std::make_shared("dateTime64ToSnowflake"); }); -} - -REGISTER_FUNCTION(SnowflakeToDateTime) -{ - factory.registerFunction("snowflakeToDateTime", - [](ContextPtr context){ return std::make_shared("snowflakeToDateTime", context); }); -} -REGISTER_FUNCTION(SnowflakeToDateTime64) -{ - factory.registerFunction("snowflakeToDateTime64", - [](ContextPtr context){ return std::make_shared("snowflakeToDateTime64", context); }); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/snowflakeIDToDateTime.cpp b/src/Functions/snowflakeIDToDateTime.cpp new file mode 100644 index 00000000000..b799792a56f --- /dev/null +++ b/src/Functions/snowflakeIDToDateTime.cpp @@ -0,0 +1,206 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/// See generateSnowflakeID.cpp +constexpr size_t time_shift = 22; + +} + +class FunctionSnowflakeIDToDateTime : public IFunction +{ +private: + const bool allow_nonconst_timezone_arguments; + +public: + static constexpr auto name = "snowflakeIDToDateTime"; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionSnowflakeIDToDateTime(ContextPtr context) + : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isUInt64), nullptr, "UInt64"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, + {"time_zone", static_cast(&isString), nullptr, "String"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + String timezone; + if (arguments.size() == 3) + timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, allow_nonconst_timezone_arguments); + + return std::make_shared(timezone); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & col_src = *arguments[0].column; + + UInt64 epoch = 0; + if (arguments.size() >= 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnDateTime::create(input_rows_count); + auto & res_data = col_res->getData(); + + if (const auto * col_src_non_const = typeid_cast(&col_src)) + { + const auto & src_data = col_src_non_const->getData(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = static_cast(((src_data[i] >> time_shift) + epoch) / 1000); + } + else if (const auto * col_src_const = typeid_cast(&col_src)) + { + UInt64 src_val = col_src_const->getValue(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = static_cast(((src_val >> time_shift) + epoch) / 1000); + } + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name); + + return col_res; + } +}; + + +class FunctionSnowflakeIDToDateTime64 : public IFunction +{ +private: + const bool allow_nonconst_timezone_arguments; + +public: + static constexpr auto name = "snowflakeIDToDateTime64"; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionSnowflakeIDToDateTime64(ContextPtr context) + : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isUInt64), nullptr, "UInt64"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, + {"time_zone", static_cast(&isString), nullptr, "String"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + String timezone; + if (arguments.size() == 3) + timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, allow_nonconst_timezone_arguments); + + return std::make_shared(3, timezone); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & col_src = *arguments[0].column; + + UInt64 epoch = 0; + if (arguments.size() >= 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnDateTime64::create(input_rows_count, 3); + auto & res_data = col_res->getData(); + + if (const auto * col_src_non_const = typeid_cast(&col_src)) + { + const auto & src_data = col_src_non_const->getData(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = (src_data[i] >> time_shift) + epoch; + } + else if (const auto * col_src_const = typeid_cast(&col_src)) + { + UInt64 src_val = col_src_const->getValue(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = (src_val >> time_shift) + epoch; + } + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name); + + return col_res; + + } +}; + +REGISTER_FUNCTION(SnowflakeIDToDateTime) +{ + { + FunctionDocumentation::Description description = R"(Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime](../data-types/datetime.md).)"; + FunctionDocumentation::Syntax syntax = "snowflakeIDToDateTime(value[, epoch[, time_zone]])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Snowflake ID. [UInt64](../data-types/int-uint.md)"}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"}, + {"time_zone", "[Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT snowflakeIDToDateTime(7204436857747984384)", "2024-06-06 10:59:58"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } + + { + FunctionDocumentation::Description description = R"(Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime64](../data-types/datetime64.md).)"; + FunctionDocumentation::Syntax syntax = "snowflakeIDToDateTime64(value[, epoch[, time_zone]])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Snowflake ID. [UInt64](../data-types/int-uint.md)"}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"}, + {"time_zone", "[Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT snowflakeIDToDateTime64(7204436857747984384)", "2024-06-06 10:59:58"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } +} + +} diff --git a/src/Functions/tests/gtest_ternary_logic.cpp b/src/Functions/tests/gtest_ternary_logic.cpp deleted file mode 100644 index 5ecafabb361..00000000000 --- a/src/Functions/tests/gtest_ternary_logic.cpp +++ /dev/null @@ -1,354 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// I know that inclusion of .cpp is not good at all -#include // NOLINT - -using namespace DB; -using TernaryValues = std::vector; - -struct LinearCongruentialGenerator -{ - /// Constants from `man lrand48_r`. - static constexpr UInt64 a = 0x5DEECE66D; - static constexpr UInt64 c = 0xB; - - /// And this is from `head -c8 /dev/urandom | xxd -p` - UInt64 current = 0x09826f4a081cee35ULL; - - UInt32 next() - { - current = current * a + c; - return static_cast(current >> 16); - } -}; - -void generateRandomTernaryValue(LinearCongruentialGenerator & gen, Ternary::ResultType * output, size_t size, double false_ratio, double null_ratio) -{ - /// The LinearCongruentialGenerator generates nonnegative integers uniformly distributed over the interval [0, 2^32). - /// See https://linux.die.net/man/3/nrand48 - - double false_percentile = false_ratio; - double null_percentile = false_ratio + null_ratio; - - false_percentile = false_percentile > 1 ? 1 : false_percentile; - null_percentile = null_percentile > 1 ? 1 : null_percentile; - - UInt32 false_threshold = static_cast(static_cast(std::numeric_limits::max()) * false_percentile); - UInt32 null_threshold = static_cast(static_cast(std::numeric_limits::max()) * null_percentile); - - for (Ternary::ResultType * end = output + size; output != end; ++output) - { - UInt32 val = gen.next(); - *output = val < false_threshold ? Ternary::False : (val < null_threshold ? Ternary::Null : Ternary::True); - } -} - -template -ColumnPtr createColumnNullable(const Ternary::ResultType * ternary_values, size_t size) -{ - auto nested_column = ColumnVector::create(size); - auto null_map = ColumnUInt8::create(size); - auto & nested_column_data = nested_column->getData(); - auto & null_map_data = null_map->getData(); - - for (size_t i = 0; i < size; ++i) - { - if (ternary_values[i] == Ternary::Null) - { - null_map_data[i] = 1; - nested_column_data[i] = 0; - } - else if (ternary_values[i] == Ternary::True) - { - null_map_data[i] = 0; - nested_column_data[i] = 100; - } - else - { - null_map_data[i] = 0; - nested_column_data[i] = 0; - } - } - - return ColumnNullable::create(std::move(nested_column), std::move(null_map)); -} - -template -ColumnPtr createColumnVector(const Ternary::ResultType * ternary_values, size_t size) -{ - auto column = ColumnVector::create(size); - auto & column_data = column->getData(); - - for (size_t i = 0; i < size; ++i) - { - if (ternary_values[i] == Ternary::True) - { - column_data[i] = 100; - } - else - { - column_data[i] = 0; - } - } - - return column; -} - -template -ColumnPtr createRandomColumn(LinearCongruentialGenerator & gen, TernaryValues & ternary_values) -{ - size_t size = ternary_values.size(); - Ternary::ResultType * ternary_data = ternary_values.data(); - - if constexpr (std::is_same_v) - { - generateRandomTernaryValue(gen, ternary_data, size, 0.3, 0.7); - return createColumnNullable(ternary_data, size); - } - else if constexpr (std::is_same_v>) - { - generateRandomTernaryValue(gen, ternary_data, size, 0.5, 0); - return createColumnVector(ternary_data, size); - } - else - { - auto nested_col = ColumnNothing::create(size); - auto null_map = ColumnUInt8::create(size); - - memset(ternary_data, Ternary::Null, size); - - return ColumnNullable::create(std::move(nested_col), std::move(null_map)); - } -} - -/* The truth table of ternary And and Or operations: - * +-------+-------+---------+--------+ - * | a | b | a And b | a Or b | - * +-------+-------+---------+--------+ - * | False | False | False | False | - * | False | Null | False | Null | - * | False | True | False | True | - * | Null | False | False | Null | - * | Null | Null | Null | Null | - * | Null | True | Null | True | - * | True | False | False | True | - * | True | Null | Null | True | - * | True | True | True | True | - * +-------+-------+---------+--------+ - * - * https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic - */ -template -bool testTernaryLogicTruthTable() -{ - constexpr size_t size = 9; - - Ternary::ResultType col_a_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null, Ternary::Null, Ternary::True, Ternary::True, Ternary::True}; - Ternary::ResultType col_b_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::False, Ternary::Null, Ternary::True,Ternary::False, Ternary::Null, Ternary::True}; - Ternary::ResultType and_expected_ternary[] = {Ternary::False, Ternary::False, Ternary::False, Ternary::False, Ternary::Null, Ternary::Null,Ternary::False, Ternary::Null, Ternary::True}; - Ternary::ResultType or_expected_ternary[] = {Ternary::False, Ternary::Null, Ternary::True, Ternary::Null, Ternary::Null, Ternary::True,Ternary::True, Ternary::True, Ternary::True}; - Ternary::ResultType * expected_ternary; - - - if constexpr (std::is_same_v) - { - expected_ternary = and_expected_ternary; - } - else - { - expected_ternary = or_expected_ternary; - } - - auto col_a = createColumnNullable(col_a_ternary, size); - auto col_b = createColumnNullable(col_b_ternary, size); - ColumnRawPtrs arguments = {col_a.get(), col_b.get()}; - - auto col_res = ColumnUInt8::create(size); - auto & col_res_data = col_res->getData(); - - OperationApplier::apply(arguments, col_res->getData(), false); - - for (size_t i = 0; i < size; ++i) - { - if (col_res_data[i] != expected_ternary[i]) return false; - } - - return true; -} - -template -bool testTernaryLogicOfTwoColumns(size_t size) -{ - LinearCongruentialGenerator gen; - - TernaryValues left_column_ternary(size); - TernaryValues right_column_ternary(size); - TernaryValues expected_ternary(size); - - ColumnPtr left = createRandomColumn(gen, left_column_ternary); - ColumnPtr right = createRandomColumn(gen, right_column_ternary); - - for (size_t i = 0; i < size; ++i) - { - /// Given that False is less than Null and Null is less than True, the And operation can be implemented - /// with std::min, and the Or operation can be implemented with std::max. - if constexpr (std::is_same_v) - { - expected_ternary[i] = std::min(left_column_ternary[i], right_column_ternary[i]); - } - else - { - expected_ternary[i] = std::max(left_column_ternary[i], right_column_ternary[i]); - } - } - - ColumnRawPtrs arguments = {left.get(), right.get()}; - - auto col_res = ColumnUInt8::create(size); - auto & col_res_data = col_res->getData(); - - OperationApplier::apply(arguments, col_res->getData(), false); - - for (size_t i = 0; i < size; ++i) - { - if (col_res_data[i] != expected_ternary[i]) return false; - } - - return true; -} - -TEST(TernaryLogicTruthTable, NestedUInt8) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedUInt16) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedUInt32) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedUInt64) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt8) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt16) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt32) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedInt64) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedFloat32) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTruthTable, NestedFloat64) -{ - bool test_1 = testTernaryLogicTruthTable(); - bool test_2 = testTernaryLogicTruthTable(); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, TwoNullable) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, TwoVector) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, TwoNothing) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, NullableVector) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, NullableNothing) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} - -TEST(TernaryLogicTwoColumns, VectorNothing) -{ - bool test_1 = testTernaryLogicOfTwoColumns(100 /*size*/); - bool test_2 = testTernaryLogicOfTwoColumns(100 /*size*/); - ASSERT_EQ(test_1, true); - ASSERT_EQ(test_2, true); -} diff --git a/src/Functions/tryBase64Decode.cpp b/src/Functions/tryBase64Decode.cpp index bd452b8357b..08eabe93200 100644 --- a/src/Functions/tryBase64Decode.cpp +++ b/src/Functions/tryBase64Decode.cpp @@ -7,7 +7,14 @@ namespace DB { REGISTER_FUNCTION(TryBase64Decode) { - factory.registerFunction>(); + FunctionDocumentation::Description description = R"(Decodes a String or FixedString from base64, like base64Decode but returns an empty string in case of an error.)"; + FunctionDocumentation::Syntax syntax = "tryBase64Decode(encoded)"; + FunctionDocumentation::Arguments arguments = {{"encoded", "String column or constant. If the string is not a valid Base64-encoded value, returns an empty string."}}; + FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument."; + FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64Decode('Y2xpY2tob3VzZQ==')", "clickhouse"}, {"invalid", "SELECT tryBase64Decode('invalid')", ""}}; + FunctionDocumentation::Categories categories = {"String encoding"}; + + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } } diff --git a/src/Functions/tryBase64URLDecode.cpp b/src/Functions/tryBase64URLDecode.cpp new file mode 100644 index 00000000000..b44bc7538ee --- /dev/null +++ b/src/Functions/tryBase64URLDecode.cpp @@ -0,0 +1,21 @@ +#include + +#if USE_BASE64 +#include + +namespace DB +{ +REGISTER_FUNCTION(TryBase64URLDecode) +{ + FunctionDocumentation::Description description = R"(Decodes an URL from base64, like base64URLDecode but returns an empty string in case of an error.)"; + FunctionDocumentation::Syntax syntax = "tryBase64URLDecode(encodedUrl)"; + FunctionDocumentation::Arguments arguments = {{"encodedURL", "String column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string."}}; + FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument."; + FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64URLDecode('aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ')", "https://clickhouse.com"}, {"invalid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja')", ""}}; + FunctionDocumentation::Categories categories = {"String encoding"}; + + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); +} +} + +#endif diff --git a/src/Functions/tupleConcat.cpp b/src/Functions/tupleConcat.cpp index c48e4d61463..c9cdae10bcf 100644 --- a/src/Functions/tupleConcat.cpp +++ b/src/Functions/tupleConcat.cpp @@ -61,7 +61,7 @@ public: return std::make_shared(tuple_arg_types); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const size_t num_arguments = arguments.size(); Columns columns; @@ -92,6 +92,9 @@ public: columns.push_back(inner_col); } + if (columns.empty()) + return ColumnTuple::create(input_rows_count); + return ColumnTuple::create(columns); } }; diff --git a/src/Functions/wkt.cpp b/src/Functions/wkt.cpp index afcfabd0bf4..678ec02d229 100644 --- a/src/Functions/wkt.cpp +++ b/src/Functions/wkt.cpp @@ -41,6 +41,14 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + /* + * Functions like recursiveRemoveLowCardinality don't pay enough attention to custom types and just erase + * the information about it during type conversions. + * While it is a big problem the quick solution would be just to disable default low cardinality implementation + * because it doesn't make a lot of sense for geo types. + */ + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { auto res_column = ColumnString::create(); diff --git a/src/IO/Archives/IArchiveReader.h b/src/IO/Archives/IArchiveReader.h index ee516d2655b..d7758b9e401 100644 --- a/src/IO/Archives/IArchiveReader.h +++ b/src/IO/Archives/IArchiveReader.h @@ -5,6 +5,7 @@ #include #include +#include namespace DB { @@ -25,6 +26,7 @@ public: { UInt64 uncompressed_size; UInt64 compressed_size; + Poco::Timestamp last_modified; bool is_encrypted; }; diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp index bec7f587180..e3fe63fa40d 100644 --- a/src/IO/Archives/LibArchiveReader.cpp +++ b/src/IO/Archives/LibArchiveReader.cpp @@ -157,6 +157,7 @@ public: file_info.emplace(); file_info->uncompressed_size = archive_entry_size(current_entry); file_info->compressed_size = archive_entry_size(current_entry); + file_info->last_modified = archive_entry_mtime(current_entry); file_info->is_encrypted = false; } diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 8bd436f218c..6386c7a3c76 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -16,10 +16,12 @@ namespace ProfileEvents { extern const Event AzureCopyObject; - extern const Event AzureUploadPart; + extern const Event AzureStageBlock; + extern const Event AzureCommitBlockList; extern const Event DiskAzureCopyObject; - extern const Event DiskAzureUploadPart; + extern const Event DiskAzureStageBlock; + extern const Event DiskAzureCommitBlockList; } @@ -156,6 +158,10 @@ namespace void completeMultipartUpload() { auto block_blob_client = client->GetBlockBlobClient(dest_blob); + ProfileEvents::increment(ProfileEvents::AzureCommitBlockList); + if (client->GetClickhouseOptions().IsClientForDisk) + ProfileEvents::increment(ProfileEvents::DiskAzureCommitBlockList); + block_blob_client.CommitBlockList(block_ids); } @@ -259,9 +265,9 @@ namespace void processUploadPartRequest(UploadPartTask & task) { - ProfileEvents::increment(ProfileEvents::AzureUploadPart); + ProfileEvents::increment(ProfileEvents::AzureStageBlock); if (client->GetClickhouseOptions().IsClientForDisk) - ProfileEvents::increment(ProfileEvents::DiskAzureUploadPart); + ProfileEvents::increment(ProfileEvents::DiskAzureStageBlock); auto block_blob_client = client->GetBlockBlobClient(dest_blob); auto read_buffer = std::make_unique(create_read_buffer(), task.part_offset, task.part_size); @@ -333,7 +339,6 @@ void copyAzureBlobStorageFile( const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { - if (settings->use_native_copy) { LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); diff --git a/src/IO/CascadeWriteBuffer.cpp b/src/IO/CascadeWriteBuffer.cpp index 91a42e77fdb..8b863cb253c 100644 --- a/src/IO/CascadeWriteBuffer.cpp +++ b/src/IO/CascadeWriteBuffer.cpp @@ -83,6 +83,20 @@ void CascadeWriteBuffer::finalizeImpl() } } +void CascadeWriteBuffer::cancelImpl() noexcept +{ + if (curr_buffer) + curr_buffer->position() = position(); + + for (auto & buf : prepared_sources) + { + if (buf) + { + buf->cancel(); + } + } +} + WriteBuffer * CascadeWriteBuffer::setNextBuffer() { if (first_lazy_source_num <= curr_buffer_num && curr_buffer_num < num_sources) diff --git a/src/IO/CascadeWriteBuffer.h b/src/IO/CascadeWriteBuffer.h index a003d11bd8a..7a8b11c6a87 100644 --- a/src/IO/CascadeWriteBuffer.h +++ b/src/IO/CascadeWriteBuffer.h @@ -16,7 +16,7 @@ namespace ErrorCodes * (lazy_sources contains not pointers themself, but their delayed constructors) * * Firtly, CascadeWriteBuffer redirects data to first buffer of the sequence - * If current WriteBuffer cannot receive data anymore, it throws special exception MemoryWriteBuffer::CurrentBufferExhausted in nextImpl() body, + * If current WriteBuffer cannot receive data anymore, it throws special exception WriteBuffer::CurrentBufferExhausted in nextImpl() body, * CascadeWriteBuffer prepare next buffer and continuously redirects data to it. * If there are no buffers anymore CascadeWriteBuffer throws an exception. * @@ -48,6 +48,7 @@ public: private: void finalizeImpl() override; + void cancelImpl() noexcept override; WriteBuffer * setNextBuffer(); diff --git a/src/IO/MemoryReadWriteBuffer.h b/src/IO/MemoryReadWriteBuffer.h index d7ca992aa44..a7d3e388cb3 100644 --- a/src/IO/MemoryReadWriteBuffer.h +++ b/src/IO/MemoryReadWriteBuffer.h @@ -16,11 +16,11 @@ namespace DB class MemoryWriteBuffer : public WriteBuffer, public IReadableWriteBuffer, boost::noncopyable, private Allocator { public: - /// Special exception to throw when the current WriteBuffer cannot receive data + /// Special exception to throw when the current MemoryWriteBuffer cannot receive data class CurrentBufferExhausted : public std::exception { public: - const char * what() const noexcept override { return "MemoryWriteBuffer limit is exhausted"; } + const char * what() const noexcept override { return "WriteBuffer limit is exhausted"; } }; /// Use max_total_size_ = 0 for unlimited storage diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 8823af55936..9e001232e65 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -51,7 +51,7 @@ ReadBufferFromS3::ReadBufferFromS3( const String & bucket_, const String & key_, const String & version_id_, - const S3Settings::RequestSettings & request_settings_, + const S3::RequestSettings & request_settings_, const ReadSettings & settings_, bool use_external_buffer_, size_t offset_, @@ -318,7 +318,7 @@ size_t ReadBufferFromS3::getFileSize() if (file_size) return *file_size; - auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, request_settings); + auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id); file_size = object_size; return *file_size; diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 003c88df7d2..c6625c2d632 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include "config.h" #if USE_AWS_S3 @@ -28,7 +28,7 @@ private: String bucket; String key; String version_id; - const S3Settings::RequestSettings request_settings; + const S3::RequestSettings request_settings; /// These variables are atomic because they can be used for `logging only` /// (where it is not important to get consistent result) @@ -47,7 +47,7 @@ public: const String & bucket_, const String & key_, const String & version_id_, - const S3Settings::RequestSettings & request_settings_, + const S3::RequestSettings & request_settings_, const ReadSettings & settings_, bool use_external_buffer = false, size_t offset_ = 0, diff --git a/src/IO/S3/BlobStorageLogWriter.cpp b/src/IO/S3/BlobStorageLogWriter.cpp index aaf4aea5a8e..c2f0cb86928 100644 --- a/src/IO/S3/BlobStorageLogWriter.cpp +++ b/src/IO/S3/BlobStorageLogWriter.cpp @@ -23,6 +23,9 @@ void BlobStorageLogWriter::addEvent( if (!log) return; + if (log->shouldIgnorePath(local_path_.empty() ? local_path : local_path_)) + return; + if (!time_now.time_since_epoch().count()) time_now = std::chrono::system_clock::now(); diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 9229342b8c1..55441cfb86b 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -30,10 +30,6 @@ #include -#ifdef ADDRESS_SANITIZER -#include -#endif - namespace ProfileEvents { extern const Event S3WriteRequestsErrors; @@ -880,14 +876,7 @@ void ClientCacheRegistry::clearCacheForAll() ClientFactory::ClientFactory() { aws_options = Aws::SDKOptions{}; - { -#ifdef ADDRESS_SANITIZER - /// Leak sanitizer (part of address sanitizer) thinks that memory in OpenSSL (called by AWS SDK) is allocated but not - /// released. Actually, the memory is released at the end of the program (ClientFactory is a singleton, see the dtor). - __lsan::ScopedDisabler lsan_disabler; -#endif - Aws::InitAPI(aws_options); - } + Aws::InitAPI(aws_options); Aws::Utils::Logging::InitializeAWSLogging(std::make_shared(false)); Aws::Http::SetHttpClientFactory(std::make_shared()); } diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index bd281846343..2fd0a9cd2d1 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -219,6 +219,9 @@ public: return client_configuration.for_disk_s3; } + ThrottlerPtr getPutRequestThrottler() const { return client_configuration.put_request_throttler; } + ThrottlerPtr getGetRequestThrottler() const { return client_configuration.get_request_throttler; } + private: friend struct ::MockS3::Client; diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index fa9d018eaa6..dfb7727fca4 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -9,6 +9,21 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } +namespace S3 +{ + std::string tryGetRunningAvailabilityZone() + { + try + { + return getRunningAvailabilityZone(); + } + catch (...) + { + tryLogCurrentException("tryGetRunningAvailabilityZone"); + return ""; + } + } +} } #if USE_AWS_S3 diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h index 8d586223035..95297ab0538 100644 --- a/src/IO/S3/Credentials.h +++ b/src/IO/S3/Credentials.h @@ -13,23 +13,18 @@ # include # include +# include namespace DB::S3 { -inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120; -inline static constexpr uint64_t DEFAULT_CONNECT_TIMEOUT_MS = 1000; -inline static constexpr uint64_t DEFAULT_REQUEST_TIMEOUT_MS = 30000; -inline static constexpr uint64_t DEFAULT_MAX_CONNECTIONS = 100; -inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_TIMEOUT = 5; -inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_MAX_REQUESTS = 100; - /// In GCP metadata service can be accessed via DNS regardless of IPv4 or IPv6. static inline constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata.google.internal"; /// getRunningAvailabilityZone returns the availability zone of the underlying compute resources where the current process runs. std::string getRunningAvailabilityZone(); +std::string tryGetRunningAvailabilityZone(); class AWSEC2MetadataClient : public Aws::Internal::AWSHttpResourceClient { @@ -201,6 +196,7 @@ namespace DB namespace S3 { std::string getRunningAvailabilityZone(); +std::string tryGetRunningAvailabilityZone(); } } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 1cef43530e0..aab7a39534d 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -535,7 +535,7 @@ void PocoHTTPClient::makeRequestInternalImpl( const static std::string_view needle = ""; if (auto it = std::search(response_string.begin(), response_string.end(), std::default_searcher(needle.begin(), needle.end())); it != response_string.end()) { - LOG_WARNING(log, "Response for request contain tag in body, settings internal server error (500 code)"); + LOG_WARNING(log, "Response for the request contains an tag in the body, will treat it as an internal server error (code 500)"); response->SetResponseCode(Aws::Http::HttpResponseCode::INTERNAL_SERVER_ERROR); addMetric(request, S3MetricType::Errors); diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index d3968d883e8..bb654c3f5c9 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -56,7 +56,7 @@ namespace const std::shared_ptr & client_ptr_, const String & dest_bucket_, const String & dest_key_, - const S3Settings::RequestSettings & request_settings_, + const S3::RequestSettings & request_settings_, const std::optional> & object_metadata_, ThreadPoolCallbackRunnerUnsafe schedule_, bool for_disk_s3_, @@ -66,7 +66,6 @@ namespace , dest_bucket(dest_bucket_) , dest_key(dest_key_) , request_settings(request_settings_) - , upload_settings(request_settings.getUploadSettings()) , object_metadata(object_metadata_) , schedule(schedule_) , for_disk_s3(for_disk_s3_) @@ -81,8 +80,7 @@ namespace std::shared_ptr client_ptr; const String & dest_bucket; const String & dest_key; - const S3Settings::RequestSettings & request_settings; - const S3Settings::RequestSettings::PartUploadSettings & upload_settings; + const S3::RequestSettings & request_settings; const std::optional> & object_metadata; ThreadPoolCallbackRunnerUnsafe schedule; bool for_disk_s3; @@ -127,8 +125,8 @@ namespace if (object_metadata.has_value()) request.SetMetadata(object_metadata.value()); - const auto & storage_class_name = upload_settings.storage_class_name; - if (!storage_class_name.empty()) + const auto & storage_class_name = request_settings.storage_class_name; + if (!storage_class_name.value.empty()) request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(storage_class_name)); client_ptr->setKMSHeaders(request); @@ -187,7 +185,7 @@ namespace request.SetMultipartUpload(multipart_upload); - size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries.value, 1UL); for (size_t retries = 1;; ++retries) { ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload); @@ -241,7 +239,7 @@ namespace void checkObjectAfterUpload() { LOG_TRACE(log, "Checking object {} exists after upload", dest_key); - S3::checkObjectExists(*client_ptr, dest_bucket, dest_key, {}, request_settings, "Immediately after upload"); + S3::checkObjectExists(*client_ptr, dest_bucket, dest_key, {}, "Immediately after upload"); LOG_TRACE(log, "Object {} exists after upload", dest_key); } @@ -292,9 +290,9 @@ namespace if (!total_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen"); - auto max_part_number = upload_settings.max_part_number; - auto min_upload_part_size = upload_settings.min_upload_part_size; - auto max_upload_part_size = upload_settings.max_upload_part_size; + auto max_part_number = request_settings.max_part_number; + auto min_upload_part_size = request_settings.min_upload_part_size; + auto max_upload_part_size = request_settings.max_upload_part_size; if (!max_part_number) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_part_number must not be 0"); @@ -467,7 +465,7 @@ namespace const std::shared_ptr & client_ptr_, const String & dest_bucket_, const String & dest_key_, - const S3Settings::RequestSettings & request_settings_, + const S3::RequestSettings & request_settings_, const std::optional> & object_metadata_, ThreadPoolCallbackRunnerUnsafe schedule_, bool for_disk_s3_, @@ -481,7 +479,7 @@ namespace void performCopy() { - if (size <= upload_settings.max_single_part_upload_size) + if (size <= request_settings.max_single_part_upload_size) performSinglepartUpload(); else performMultipartUpload(); @@ -514,8 +512,8 @@ namespace if (object_metadata.has_value()) request.SetMetadata(object_metadata.value()); - const auto & storage_class_name = upload_settings.storage_class_name; - if (!storage_class_name.empty()) + const auto & storage_class_name = request_settings.storage_class_name; + if (!storage_class_name.value.empty()) request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(storage_class_name)); /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840 @@ -526,7 +524,7 @@ namespace void processPutRequest(S3::PutObjectRequest & request) { - size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries.value, 1UL); for (size_t retries = 1;; ++retries) { ProfileEvents::increment(ProfileEvents::S3PutObject); @@ -649,7 +647,7 @@ namespace size_t src_size_, const String & dest_bucket_, const String & dest_key_, - const S3Settings::RequestSettings & request_settings_, + const S3::RequestSettings & request_settings_, const ReadSettings & read_settings_, const std::optional> & object_metadata_, ThreadPoolCallbackRunnerUnsafe schedule_, @@ -679,7 +677,7 @@ namespace void performCopy() { LOG_TEST(log, "Copy object {} to {} using native copy", src_key, dest_key); - if (!supports_multipart_copy || size <= upload_settings.max_single_operation_copy_size) + if (!supports_multipart_copy || size <= request_settings.max_single_operation_copy_size) performSingleOperationCopy(); else performMultipartUploadCopy(); @@ -716,8 +714,8 @@ namespace request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE); } - const auto & storage_class_name = upload_settings.storage_class_name; - if (!storage_class_name.empty()) + const auto & storage_class_name = request_settings.storage_class_name; + if (!storage_class_name.value.empty()) request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(storage_class_name)); /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840 @@ -728,7 +726,7 @@ namespace void processCopyRequest(S3::CopyObjectRequest & request) { - size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries.value, 1UL); for (size_t retries = 1;; ++retries) { ProfileEvents::increment(ProfileEvents::S3CopyObject); @@ -852,7 +850,7 @@ void copyDataToS3File( const std::shared_ptr & dest_s3_client, const String & dest_bucket, const String & dest_key, - const S3Settings::RequestSettings & settings, + const S3::RequestSettings & settings, BlobStorageLogWriterPtr blob_storage_log, const std::optional> & object_metadata, ThreadPoolCallbackRunnerUnsafe schedule, @@ -883,7 +881,7 @@ void copyS3File( std::shared_ptr dest_s3_client, const String & dest_bucket, const String & dest_key, - const S3Settings::RequestSettings & settings, + const S3::RequestSettings & settings, const ReadSettings & read_settings, BlobStorageLogWriterPtr blob_storage_log, const std::optional> & object_metadata, diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index 85b3870ddbf..c33f55cb21b 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -4,7 +4,7 @@ #if USE_AWS_S3 -#include +#include #include #include #include @@ -39,7 +39,7 @@ void copyS3File( std::shared_ptr dest_s3_client, const String & dest_bucket, const String & dest_key, - const S3Settings::RequestSettings & settings, + const S3::RequestSettings & settings, const ReadSettings & read_settings, BlobStorageLogWriterPtr blob_storage_log, const std::optional> & object_metadata = std::nullopt, @@ -58,7 +58,7 @@ void copyDataToS3File( const std::shared_ptr & dest_s3_client, const String & dest_bucket, const String & dest_key, - const S3Settings::RequestSettings & settings, + const S3::RequestSettings & settings, BlobStorageLogWriterPtr blob_storage_log, const std::optional> & object_metadata = std::nullopt, ThreadPoolCallbackRunnerUnsafe schedule_ = {}, diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp index 78efda4ae57..9271ad820e4 100644 --- a/src/IO/S3/getObjectInfo.cpp +++ b/src/IO/S3/getObjectInfo.cpp @@ -44,7 +44,7 @@ namespace /// Performs a request to get the size and last modification time of an object. std::pair, Aws::S3::S3Error> tryGetObjectInfo( const S3::Client & client, const String & bucket, const String & key, const String & version_id, - const S3Settings::RequestSettings & /*request_settings*/, bool with_metadata) + bool with_metadata) { auto outcome = headObject(client, bucket, key, version_id); if (!outcome.IsSuccess()) @@ -73,11 +73,10 @@ ObjectInfo getObjectInfo( const String & bucket, const String & key, const String & version_id, - const S3Settings::RequestSettings & request_settings, bool with_metadata, bool throw_on_error) { - auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, request_settings, with_metadata); + auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, with_metadata); if (object_info) { return *object_info; @@ -96,20 +95,18 @@ size_t getObjectSize( const String & bucket, const String & key, const String & version_id, - const S3Settings::RequestSettings & request_settings, bool throw_on_error) { - return getObjectInfo(client, bucket, key, version_id, request_settings, {}, throw_on_error).size; + return getObjectInfo(client, bucket, key, version_id, {}, throw_on_error).size; } bool objectExists( const S3::Client & client, const String & bucket, const String & key, - const String & version_id, - const S3Settings::RequestSettings & request_settings) + const String & version_id) { - auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, request_settings, {}); + auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, {}); if (object_info) return true; @@ -126,10 +123,9 @@ void checkObjectExists( const String & bucket, const String & key, const String & version_id, - const S3Settings::RequestSettings & request_settings, std::string_view description) { - auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, request_settings, {}); + auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, {}); if (object_info) return; throw S3Exception(error.GetErrorType(), "{}Object {} in bucket {} suddenly disappeared: {}", diff --git a/src/IO/S3/getObjectInfo.h b/src/IO/S3/getObjectInfo.h index ac8072a4338..32f34f74069 100644 --- a/src/IO/S3/getObjectInfo.h +++ b/src/IO/S3/getObjectInfo.h @@ -3,7 +3,7 @@ #include "config.h" #if USE_AWS_S3 -#include +#include #include #include @@ -24,7 +24,6 @@ ObjectInfo getObjectInfo( const String & bucket, const String & key, const String & version_id = {}, - const S3Settings::RequestSettings & request_settings = {}, bool with_metadata = false, bool throw_on_error = true); @@ -33,15 +32,13 @@ size_t getObjectSize( const String & bucket, const String & key, const String & version_id = {}, - const S3Settings::RequestSettings & request_settings = {}, bool throw_on_error = true); bool objectExists( const S3::Client & client, const String & bucket, const String & key, - const String & version_id = {}, - const S3Settings::RequestSettings & request_settings = {}); + const String & version_id = {}); /// Throws an exception if a specified object doesn't exist. `description` is used as a part of the error message. void checkObjectExists( @@ -49,7 +46,6 @@ void checkObjectExists( const String & bucket, const String & key, const String & version_id = {}, - const S3Settings::RequestSettings & request_settings = {}, std::string_view description = {}); bool isNotFoundError(Aws::S3::S3Errors error); diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 0a28c578f69..5ee9648a44e 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include "TestPocoHTTPServer.h" @@ -69,7 +69,7 @@ void doReadRequest(std::shared_ptr client, const DB::S3::U UInt64 max_single_read_retries = 1; DB::ReadSettings read_settings; - DB::S3Settings::RequestSettings request_settings; + DB::S3::RequestSettings request_settings; request_settings.max_single_read_retries = max_single_read_retries; DB::ReadBufferFromS3 read_buffer( client, @@ -88,7 +88,7 @@ void doWriteRequest(std::shared_ptr client, const DB::S3:: { UInt64 max_unexpected_write_error_retries = 1; - DB::S3Settings::RequestSettings request_settings; + DB::S3::RequestSettings request_settings; request_settings.max_unexpected_write_error_retries = max_unexpected_write_error_retries; DB::WriteBufferFromS3 write_buffer( client, diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 78c51fcb29c..490bf8c2d0c 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -2,17 +2,19 @@ #include #include +#include +#include +#include +#include #include #include "config.h" #if USE_AWS_S3 -# include -# include -# include -# include -# include +#include +#include +#include namespace ProfileEvents @@ -58,6 +60,8 @@ namespace DB namespace ErrorCodes { extern const int INVALID_CONFIG_PARAMETER; + extern const int BAD_ARGUMENTS; + extern const int INVALID_SETTING_VALUE; } namespace S3 @@ -98,104 +102,320 @@ ServerSideEncryptionKMSConfig getSSEKMSConfig(const std::string & config_elem, c return sse_kms_config; } -AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config) +template +static bool setValueFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & path, + typename Settings::SettingFieldRef & field) { - auto access_key_id = config.getString(config_elem + ".access_key_id", ""); - auto secret_access_key = config.getString(config_elem + ".secret_access_key", ""); - auto session_token = config.getString(config_elem + ".session_token", ""); + if (!config.has(path)) + return false; - auto region = config.getString(config_elem + ".region", ""); - auto server_side_encryption_customer_key_base64 = config.getString(config_elem + ".server_side_encryption_customer_key_base64", ""); + auto which = field.getValue().getType(); + if (isInt64OrUInt64FieldType(which)) + field.setValue(config.getUInt64(path)); + else if (which == Field::Types::String) + field.setValue(config.getString(path)); + else if (which == Field::Types::Bool) + field.setValue(config.getBool(path)); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type: {}", field.getTypeName()); - std::optional use_environment_credentials; - if (config.has(config_elem + ".use_environment_credentials")) - use_environment_credentials = config.getBool(config_elem + ".use_environment_credentials"); + return true; +} - std::optional use_insecure_imds_request; - if (config.has(config_elem + ".use_insecure_imds_request")) - use_insecure_imds_request = config.getBool(config_elem + ".use_insecure_imds_request"); +AuthSettings::AuthSettings( + const Poco::Util::AbstractConfiguration & config, + const DB::Settings & settings, + const std::string & config_prefix) +{ + for (auto & field : allMutable()) + { + auto path = fmt::format("{}.{}", config_prefix, field.getName()); - std::optional expiration_window_seconds; - if (config.has(config_elem + ".expiration_window_seconds")) - expiration_window_seconds = config.getUInt64(config_elem + ".expiration_window_seconds"); + bool updated = setValueFromConfig(config, path, field); + if (!updated) + { + auto setting_name = "s3_" + field.getName(); + if (settings.has(setting_name) && settings.isChanged(setting_name)) + field.setValue(settings.get(setting_name)); + } + } - std::optional no_sign_request; - if (config.has(config_elem + ".no_sign_request")) - no_sign_request = config.getBool(config_elem + ".no_sign_request"); + headers = getHTTPHeaders(config_prefix, config); + server_side_encryption_kms_config = getSSEKMSConfig(config_prefix, config); - HTTPHeaderEntries headers = getHTTPHeaders(config_elem, config); - ServerSideEncryptionKMSConfig sse_kms_config = getSSEKMSConfig(config_elem, config); - - std::unordered_set users; Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_elem, keys); + config.keys(config_prefix, keys); for (const auto & key : keys) { if (startsWith(key, "user")) - users.insert(config.getString(config_elem + "." + key)); + users.insert(config.getString(config_prefix + "." + key)); } - - return AuthSettings - { - std::move(access_key_id), std::move(secret_access_key), std::move(session_token), - std::move(region), - std::move(server_side_encryption_customer_key_base64), - std::move(sse_kms_config), - std::move(headers), - use_environment_credentials, - use_insecure_imds_request, - expiration_window_seconds, - no_sign_request, - std::move(users) - }; } -bool AuthSettings::canBeUsedByUser(const String & user) const +AuthSettings::AuthSettings(const DB::Settings & settings) { - return users.empty() || users.contains(user); + updateFromSettings(settings, /* if_changed */false); +} + +void AuthSettings::updateFromSettings(const DB::Settings & settings, bool if_changed) +{ + for (auto & field : allMutable()) + { + const auto setting_name = "s3_" + field.getName(); + if (settings.has(setting_name) && (!if_changed || settings.isChanged(setting_name))) + { + field.setValue(settings.get(setting_name)); + } + } } bool AuthSettings::hasUpdates(const AuthSettings & other) const { AuthSettings copy = *this; - copy.updateFrom(other); + copy.updateIfChanged(other); return *this != copy; } -void AuthSettings::updateFrom(const AuthSettings & from) +void AuthSettings::updateIfChanged(const AuthSettings & settings) { - /// Update with check for emptyness only parameters which - /// can be passed not only from config, but via ast. + for (auto & setting : settings.all()) + { + if (setting.isValueChanged()) + set(setting.getName(), setting.getValue()); + } - if (!from.access_key_id.empty()) - access_key_id = from.access_key_id; - if (!from.secret_access_key.empty()) - secret_access_key = from.secret_access_key; - if (!from.session_token.empty()) - session_token = from.session_token; + if (!settings.headers.empty()) + headers = settings.headers; - if (!from.headers.empty()) - headers = from.headers; - if (!from.region.empty()) - region = from.region; + if (!settings.users.empty()) + users.insert(settings.users.begin(), settings.users.end()); - server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64; - server_side_encryption_kms_config = from.server_side_encryption_kms_config; - - if (from.use_environment_credentials.has_value()) - use_environment_credentials = from.use_environment_credentials; - - if (from.use_insecure_imds_request.has_value()) - use_insecure_imds_request = from.use_insecure_imds_request; - - if (from.expiration_window_seconds.has_value()) - expiration_window_seconds = from.expiration_window_seconds; - - if (from.no_sign_request.has_value()) - no_sign_request = from.no_sign_request; - - users.insert(from.users.begin(), from.users.end()); + if (settings.server_side_encryption_kms_config.key_id.has_value() + || settings.server_side_encryption_kms_config.encryption_context.has_value() + || settings.server_side_encryption_kms_config.key_id.has_value()) + server_side_encryption_kms_config = settings.server_side_encryption_kms_config; } +RequestSettings::RequestSettings( + const Poco::Util::AbstractConfiguration & config, + const DB::Settings & settings, + const std::string & config_prefix, + const std::string & setting_name_prefix, + bool validate_settings) +{ + for (auto & field : allMutable()) + { + auto path = fmt::format("{}.{}{}", config_prefix, setting_name_prefix, field.getName()); + + bool updated = setValueFromConfig(config, path, field); + if (!updated) + { + auto setting_name = "s3_" + field.getName(); + if (settings.has(setting_name) && settings.isChanged(setting_name)) + field.setValue(settings.get(setting_name)); + } + } + finishInit(settings, validate_settings); +} + +RequestSettings::RequestSettings( + const NamedCollection & collection, + const DB::Settings & settings, + bool validate_settings) +{ + auto values = allMutable(); + for (auto & field : values) + { + const auto path = field.getName(); + if (collection.has(path)) + { + auto which = field.getValue().getType(); + if (isInt64OrUInt64FieldType(which)) + field.setValue(collection.get(path)); + else if (which == Field::Types::String) + field.setValue(collection.get(path)); + else if (which == Field::Types::Bool) + field.setValue(collection.get(path)); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type: {}", field.getTypeName()); + } + } + finishInit(settings, validate_settings); +} + +RequestSettings::RequestSettings(const DB::Settings & settings, bool validate_settings) +{ + updateFromSettings(settings, /* if_changed */false, validate_settings); + finishInit(settings, validate_settings); +} + +void RequestSettings::updateFromSettings( + const DB::Settings & settings, bool if_changed, bool validate_settings) +{ + for (auto & field : allMutable()) + { + const auto setting_name = "s3_" + field.getName(); + if (settings.has(setting_name) && (!if_changed || settings.isChanged(setting_name))) + { + set(field.getName(), settings.get(setting_name)); + } + } + + normalizeSettings(); + if (validate_settings) + validateUploadSettings(); +} + +void RequestSettings::updateIfChanged(const RequestSettings & settings) +{ + for (auto & setting : settings.all()) + { + if (setting.isValueChanged()) + set(setting.getName(), setting.getValue()); + } +} + +void RequestSettings::normalizeSettings() +{ + if (!storage_class_name.value.empty() && storage_class_name.changed) + storage_class_name = Poco::toUpperInPlace(storage_class_name.value); +} + +void RequestSettings::finishInit(const DB::Settings & settings, bool validate_settings) +{ + normalizeSettings(); + if (validate_settings) + validateUploadSettings(); + + /// NOTE: it would be better to reuse old throttlers + /// to avoid losing token bucket state on every config reload, + /// which could lead to exceeding limit for short time. + /// But it is good enough unless very high `burst` values are used. + if (UInt64 max_get_rps = isChanged("max_get_rps") ? get("max_get_rps").get() : settings.s3_max_get_rps) + { + size_t default_max_get_burst = settings.s3_max_get_burst + ? settings.s3_max_get_burst + : (Throttler::default_burst_seconds * max_get_rps); + + size_t max_get_burst = isChanged("max_get_burts") ? get("max_get_burst").get() : default_max_get_burst; + get_request_throttler = std::make_shared(max_get_rps, max_get_burst); + } + if (UInt64 max_put_rps = isChanged("max_put_rps") ? get("max_put_rps").get() : settings.s3_max_put_rps) + { + size_t default_max_put_burst = settings.s3_max_put_burst + ? settings.s3_max_put_burst + : (Throttler::default_burst_seconds * max_put_rps); + size_t max_put_burst = isChanged("max_put_burts") ? get("max_put_burst").get() : default_max_put_burst; + put_request_throttler = std::make_shared(max_put_rps, max_put_burst); + } +} + +void RequestSettings::validateUploadSettings() +{ + static constexpr size_t min_upload_part_size_limit = 5 * 1024 * 1024; + if (strict_upload_part_size && strict_upload_part_size < min_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting strict_upload_part_size has invalid value {} which is less than the s3 API limit {}", + ReadableSize(strict_upload_part_size), ReadableSize(min_upload_part_size_limit)); + + if (min_upload_part_size < min_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting min_upload_part_size has invalid value {} which is less than the s3 API limit {}", + ReadableSize(min_upload_part_size), ReadableSize(min_upload_part_size_limit)); + + static constexpr size_t max_upload_part_size_limit = 5ull * 1024 * 1024 * 1024; + if (max_upload_part_size > max_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_upload_part_size has invalid value {} which is greater than the s3 API limit {}", + ReadableSize(max_upload_part_size), ReadableSize(max_upload_part_size_limit)); + + if (max_single_part_upload_size > max_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_single_part_upload_size has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_single_part_upload_size), ReadableSize(max_upload_part_size_limit)); + + if (max_single_operation_copy_size > max_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_single_operation_copy_size has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_single_operation_copy_size), ReadableSize(max_upload_part_size_limit)); + + if (max_upload_part_size < min_upload_part_size) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_upload_part_size ({}) can't be less than setting min_upload_part_size {}", + ReadableSize(max_upload_part_size), ReadableSize(min_upload_part_size)); + + if (!upload_part_size_multiply_factor) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_factor cannot be zero"); + + if (!upload_part_size_multiply_parts_count_threshold) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_parts_count_threshold cannot be zero"); + + if (!max_part_number) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_part_number cannot be zero"); + + static constexpr size_t max_part_number_limit = 10000; + if (max_part_number > max_part_number_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_part_number has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); + + size_t maybe_overflow; + if (common::mulOverflow(max_upload_part_size.value, upload_part_size_multiply_factor.value, maybe_overflow)) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_factor is too big ({}). " + "Multiplication to max_upload_part_size ({}) will cause integer overflow", + ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); + + std::unordered_set storage_class_names {"STANDARD", "INTELLIGENT_TIERING"}; + if (!storage_class_name.value.empty() && !storage_class_names.contains(storage_class_name)) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting storage_class has invalid value {} which only supports STANDARD and INTELLIGENT_TIERING", + storage_class_name.value); + + /// TODO: it's possible to set too small limits. + /// We can check that max possible object size is not too small. +} + +bool operator==(const AuthSettings & left, const AuthSettings & right) +{ + if (left.headers != right.headers) + return false; + + if (left.users != right.users) + return false; + + if (left.server_side_encryption_kms_config != right.server_side_encryption_kms_config) + return false; + + auto l = left.begin(); + for (const auto & r : right) + { + if ((l == left.end()) || (*l != r)) + return false; + ++l; + } + return l == left.end(); } } + +IMPLEMENT_SETTINGS_TRAITS(S3::AuthSettingsTraits, CLIENT_SETTINGS_LIST) +IMPLEMENT_SETTINGS_TRAITS(S3::RequestSettingsTraits, REQUEST_SETTINGS_LIST) + +} diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index b3e01bd6132..2dca08871d3 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -3,22 +3,22 @@ #include #include #include - -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include "config.h" #if USE_AWS_S3 -#include -#include -#include - #include #include - #include #include @@ -30,8 +30,6 @@ namespace ErrorCodes extern const int S3_ERROR; } -class RemoteHostFilter; - class S3Exception : public Exception { public: @@ -68,40 +66,140 @@ namespace Poco::Util class AbstractConfiguration; }; -namespace DB::S3 +namespace DB { +class NamedCollection; +struct ProxyConfigurationResolver; + +namespace S3 +{ +/// We use s3 settings for DiskS3, StorageS3 (StorageS3Cluster, S3Queue, etc), BackupIO_S3, etc. +/// 1. For DiskS3 we usually have configuration in disk section in configuration file. +/// REQUEST_SETTINGS, PART_UPLOAD_SETTINGS start with "s3_" prefix there, while AUTH_SETTINGS and CLIENT_SETTINGS do not +/// (does not make sense, but it happened this way). +/// If some setting is absent from disk configuration, we look up for it in the "s3." server config section, +/// where s3 settings no longer have "s3_" prefix like in disk configuration section. +/// If the settings is absent there as well, we look up for it in Users config (where query/session settings are also updated). +/// 2. For StorageS3 and similar - we look up to "s3." config section (again - settings there do not have "s3_" prefix). +/// If some setting is absent from there, we lool up for it in Users config. + +#define AUTH_SETTINGS(M, ALIAS) \ + M(String, access_key_id, "", "", 0) \ + M(String, secret_access_key, "", "", 0) \ + M(String, session_token, "", "", 0) \ + M(String, region, "", "", 0) \ + M(String, server_side_encryption_customer_key_base64, "", "", 0) \ + +#define CLIENT_SETTINGS(M, ALIAS) \ + M(UInt64, connect_timeout_ms, DEFAULT_CONNECT_TIMEOUT_MS, "", 0) \ + M(UInt64, request_timeout_ms, DEFAULT_REQUEST_TIMEOUT_MS, "", 0) \ + M(UInt64, max_connections, DEFAULT_MAX_CONNECTIONS, "", 0) \ + M(UInt64, http_keep_alive_timeout, DEFAULT_KEEP_ALIVE_TIMEOUT, "", 0) \ + M(UInt64, http_keep_alive_max_requests, DEFAULT_KEEP_ALIVE_MAX_REQUESTS, "", 0) \ + M(UInt64, expiration_window_seconds, DEFAULT_EXPIRATION_WINDOW_SECONDS, "", 0) \ + M(Bool, use_environment_credentials, DEFAULT_USE_ENVIRONMENT_CREDENTIALS, "", 0) \ + M(Bool, no_sign_request, DEFAULT_NO_SIGN_REQUEST, "", 0) \ + M(Bool, use_insecure_imds_request, false, "", 0) \ + M(Bool, use_adaptive_timeouts, DEFAULT_USE_ADAPTIVE_TIMEOUTS, "", 0) \ + M(Bool, is_virtual_hosted_style, false, "", 0) \ + M(Bool, disable_checksum, DEFAULT_DISABLE_CHECKSUM, "", 0) \ + M(Bool, gcs_issue_compose_request, false, "", 0) \ + +#define REQUEST_SETTINGS(M, ALIAS) \ + M(UInt64, max_single_read_retries, 4, "", 0) \ + M(UInt64, request_timeout_ms, DEFAULT_REQUEST_TIMEOUT_MS, "", 0) \ + M(UInt64, list_object_keys_size, DEFAULT_LIST_OBJECT_KEYS_SIZE, "", 0) \ + M(Bool, allow_native_copy, DEFAULT_ALLOW_NATIVE_COPY, "", 0) \ + M(Bool, check_objects_after_upload, DEFAULT_CHECK_OBJECTS_AFTER_UPLOAD, "", 0) \ + M(Bool, throw_on_zero_files_match, false, "", 0) \ + M(UInt64, max_single_operation_copy_size, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE, "", 0) \ + M(String, storage_class_name, "", "", 0) \ + +#define PART_UPLOAD_SETTINGS(M, ALIAS) \ + M(UInt64, strict_upload_part_size, 0, "", 0) \ + M(UInt64, min_upload_part_size, DEFAULT_MIN_UPLOAD_PART_SIZE, "", 0) \ + M(UInt64, max_upload_part_size, DEFAULT_MAX_UPLOAD_PART_SIZE, "", 0) \ + M(UInt64, upload_part_size_multiply_factor, DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR, "", 0) \ + M(UInt64, upload_part_size_multiply_parts_count_threshold, DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD, "", 0) \ + M(UInt64, max_inflight_parts_for_one_file, DEFAULT_MAX_INFLIGHT_PARTS_FOR_ONE_FILE, "", 0) \ + M(UInt64, max_part_number, DEFAULT_MAX_PART_NUMBER, "", 0) \ + M(UInt64, max_single_part_upload_size, DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE, "", 0) \ + M(UInt64, max_unexpected_write_error_retries, 4, "", 0) \ + +#define CLIENT_SETTINGS_LIST(M, ALIAS) \ + CLIENT_SETTINGS(M, ALIAS) \ + AUTH_SETTINGS(M, ALIAS) + +#define REQUEST_SETTINGS_LIST(M, ALIAS) \ + REQUEST_SETTINGS(M, ALIAS) \ + PART_UPLOAD_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(AuthSettingsTraits, CLIENT_SETTINGS_LIST) +DECLARE_SETTINGS_TRAITS(RequestSettingsTraits, REQUEST_SETTINGS_LIST) + +struct AuthSettings : public BaseSettings +{ + AuthSettings() = default; + + AuthSettings( + const Poco::Util::AbstractConfiguration & config, + const DB::Settings & settings, + const std::string & config_prefix); + + explicit AuthSettings(const DB::Settings & settings); + + explicit AuthSettings(const DB::NamedCollection & collection); + + void updateFromSettings(const DB::Settings & settings, bool if_changed); + bool hasUpdates(const AuthSettings & other) const; + void updateIfChanged(const AuthSettings & settings); + bool canBeUsedByUser(const String & user) const { return users.empty() || users.contains(user); } + + HTTPHeaderEntries headers; + std::unordered_set users; + ServerSideEncryptionKMSConfig server_side_encryption_kms_config; + /// Note: if you add any field, do not forget to update operator ==. +}; + +bool operator==(const AuthSettings & left, const AuthSettings & right); + +struct RequestSettings : public BaseSettings +{ + RequestSettings() = default; + + /// Create request settings from Config. + RequestSettings( + const Poco::Util::AbstractConfiguration & config, + const DB::Settings & settings, + const std::string & config_prefix, + const std::string & setting_name_prefix = "", + bool validate_settings = true); + + /// Create request settings from DB::Settings. + explicit RequestSettings(const DB::Settings & settings, bool validate_settings = true); + + /// Create request settings from NamedCollection. + RequestSettings( + const NamedCollection & collection, + const DB::Settings & settings, + bool validate_settings = true); + + void updateFromSettings(const DB::Settings & settings, bool if_changed, bool validate_settings = true); + void updateIfChanged(const RequestSettings & settings); + void validateUploadSettings(); + + ThrottlerPtr get_request_throttler; + ThrottlerPtr put_request_throttler; + std::shared_ptr proxy_resolver; + +private: + void finishInit(const DB::Settings & settings, bool validate_settings); + void normalizeSettings(); +}; HTTPHeaderEntries getHTTPHeaders(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config); ServerSideEncryptionKMSConfig getSSEKMSConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config); -struct AuthSettings -{ - static AuthSettings loadFromConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config); - - std::string access_key_id; - std::string secret_access_key; - std::string session_token; - std::string region; - std::string server_side_encryption_customer_key_base64; - ServerSideEncryptionKMSConfig server_side_encryption_kms_config; - - HTTPHeaderEntries headers; - - std::optional use_environment_credentials; - std::optional use_insecure_imds_request; - std::optional expiration_window_seconds; - std::optional no_sign_request; - - std::unordered_set users; - - bool hasUpdates(const AuthSettings & other) const; - void updateFrom(const AuthSettings & from); - - bool canBeUsedByUser(const String & user) const; - -private: - bool operator==(const AuthSettings & other) const = default; -}; - +} } diff --git a/src/IO/S3Defines.h b/src/IO/S3Defines.h new file mode 100644 index 00000000000..332ebcfea92 --- /dev/null +++ b/src/IO/S3Defines.h @@ -0,0 +1,41 @@ +#pragma once +#include + +namespace DB::S3 +{ + +/// Client settings. +inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120; +inline static constexpr uint64_t DEFAULT_CONNECT_TIMEOUT_MS = 1000; +inline static constexpr uint64_t DEFAULT_REQUEST_TIMEOUT_MS = 30000; +inline static constexpr uint64_t DEFAULT_MAX_CONNECTIONS = 1024; +inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_TIMEOUT = 5; +inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_MAX_REQUESTS = 100; + +inline static constexpr bool DEFAULT_USE_ENVIRONMENT_CREDENTIALS = true; +inline static constexpr bool DEFAULT_NO_SIGN_REQUEST = false; +inline static constexpr bool DEFAULT_DISABLE_CHECKSUM = false; +inline static constexpr bool DEFAULT_USE_ADAPTIVE_TIMEOUTS = true; + +/// Upload settings. +inline static constexpr uint64_t DEFAULT_MIN_UPLOAD_PART_SIZE = 16 * 1024 * 1024; +inline static constexpr uint64_t DEFAULT_MAX_UPLOAD_PART_SIZE = 5ull * 1024 * 1024 * 1024; +inline static constexpr uint64_t DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE = 32 * 1024 * 1024; +inline static constexpr uint64_t DEFAULT_STRICT_UPLOAD_PART_SIZE = 0; +inline static constexpr uint64_t DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR = 2; +inline static constexpr uint64_t DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD = 500; +inline static constexpr uint64_t DEFAULT_MAX_PART_NUMBER = 10000; + +/// Other settings. +inline static constexpr uint64_t DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 32 * 1024 * 1024; +inline static constexpr uint64_t DEFAULT_MAX_INFLIGHT_PARTS_FOR_ONE_FILE = 20; +inline static constexpr uint64_t DEFAULT_LIST_OBJECT_KEYS_SIZE = 1000; +inline static constexpr uint64_t DEFAULT_MAX_SINGLE_READ_TRIES = 4; +inline static constexpr uint64_t DEFAULT_MAX_UNEXPECTED_WRITE_ERROR_RETRIES = 4; +inline static constexpr uint64_t DEFAULT_MAX_REDIRECTS = 10; +inline static constexpr uint64_t DEFAULT_RETRY_ATTEMPTS = 100; + +inline static constexpr bool DEFAULT_ALLOW_NATIVE_COPY = true; +inline static constexpr bool DEFAULT_CHECK_OBJECTS_AFTER_UPLOAD = false; + +} diff --git a/src/IO/S3Settings.cpp b/src/IO/S3Settings.cpp new file mode 100644 index 00000000000..a5a50c873cb --- /dev/null +++ b/src/IO/S3Settings.cpp @@ -0,0 +1,80 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +void S3Settings::loadFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const DB::Settings & settings) +{ + auth_settings = S3::AuthSettings(config, settings, config_prefix); + request_settings = S3::RequestSettings(config, settings, config_prefix); +} + +void S3Settings::updateIfChanged(const S3Settings & settings) +{ + auth_settings.updateIfChanged(settings.auth_settings); + request_settings.updateIfChanged(settings.request_settings); +} + +void S3SettingsByEndpoint::loadFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const DB::Settings & settings) +{ + std::lock_guard lock(mutex); + s3_settings.clear(); + if (!config.has(config_prefix)) + return; + + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(config_prefix, config_keys); + auto default_auth_settings = S3::AuthSettings(config, settings, config_prefix); + auto default_request_settings = S3::RequestSettings(config, settings, config_prefix); + + for (const String & key : config_keys) + { + const auto key_path = config_prefix + "." + key; + const auto endpoint_path = key_path + ".endpoint"; + if (config.has(endpoint_path)) + { + auto auth_settings{default_auth_settings}; + auth_settings.updateIfChanged(S3::AuthSettings(config, settings, key_path)); + + auto request_settings{default_request_settings}; + request_settings.updateIfChanged(S3::RequestSettings(config, settings, key_path, "", settings.s3_validate_request_settings)); + + s3_settings.emplace( + config.getString(endpoint_path), + S3Settings{std::move(auth_settings), std::move(request_settings)}); + } + } +} + +std::optional S3SettingsByEndpoint::getSettings( + const String & endpoint, + const String & user, + bool ignore_user) const +{ + std::lock_guard lock(mutex); + auto next_prefix_setting = s3_settings.upper_bound(endpoint); + + /// Linear time algorithm may be replaced with logarithmic with prefix tree map. + for (auto possible_prefix_setting = next_prefix_setting; possible_prefix_setting != s3_settings.begin();) + { + std::advance(possible_prefix_setting, -1); + const auto & [endpoint_prefix, settings] = *possible_prefix_setting; + if (endpoint.starts_with(endpoint_prefix) && (ignore_user || settings.auth_settings.canBeUsedByUser(user))) + return possible_prefix_setting->second; + } + + return {}; +} + +} diff --git a/src/IO/S3Settings.h b/src/IO/S3Settings.h new file mode 100644 index 00000000000..9eed0a5652f --- /dev/null +++ b/src/IO/S3Settings.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace Poco::Util { class AbstractConfiguration; } + +namespace DB +{ + +struct Settings; + +struct S3Settings +{ + S3::AuthSettings auth_settings; + S3::RequestSettings request_settings; + + void loadFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const DB::Settings & settings); + + void updateIfChanged(const S3Settings & settings); +}; + +class S3SettingsByEndpoint +{ +public: + void loadFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const DB::Settings & settings); + + std::optional getSettings( + const std::string & endpoint, + const std::string & user, + bool ignore_user = false) const; + +private: + mutable std::mutex mutex; + std::map s3_settings; +}; + + +} diff --git a/src/IO/WriteBuffer.cpp b/src/IO/WriteBuffer.cpp index bcc7445486e..a86eb4ccea2 100644 --- a/src/IO/WriteBuffer.cpp +++ b/src/IO/WriteBuffer.cpp @@ -11,7 +11,7 @@ namespace DB WriteBuffer::~WriteBuffer() { // That destructor could be call with finalized=false in case of exceptions - if (count() > 0 && !finalized) + if (count() > 0 && !finalized && !canceled) { /// It is totally OK to destroy instance without finalization when an exception occurs /// However it is suspicious to destroy instance without finalization at the green path @@ -20,7 +20,7 @@ WriteBuffer::~WriteBuffer() LoggerPtr log = getLogger("WriteBuffer"); LOG_ERROR( log, - "WriteBuffer is not finalized when destructor is called. " + "WriteBuffer is neither finalized nor canceled when destructor is called. " "No exceptions in flight are detected. " "The file might not be written at all or might be truncated. " "Stack trace: {}", @@ -30,4 +30,13 @@ WriteBuffer::~WriteBuffer() } } +void WriteBuffer::cancel() noexcept +{ + if (canceled || finalized) + return; + + LockMemoryExceptionInThread lock(VariableContext::Global); + cancelImpl(); + canceled = true; +} } diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index ef4e0058ec3..4759f96a235 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -59,6 +59,7 @@ public: */ pos = working_buffer.begin(); bytes += bytes_in_buffer; + throw; } @@ -75,7 +76,6 @@ public: next(); } - void write(const char * from, size_t n) { if (finalized) @@ -121,6 +121,9 @@ public: if (finalized) return; + if (canceled) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot finalize buffer after cancellation."); + LockMemoryExceptionInThread lock(VariableContext::Global); try { @@ -130,11 +133,15 @@ public: catch (...) { pos = working_buffer.begin(); - finalized = true; + + cancel(); + throw; } } + void cancel() noexcept; + /// Wait for data to be reliably written. Mainly, call fsync for fd. /// May be called after finalize() if needed. virtual void sync() @@ -150,7 +157,12 @@ protected: next(); } + virtual void cancelImpl() noexcept + { + } + bool finalized = false; + bool canceled = false; private: /** Write the data in the buffer (from the beginning of the buffer to the current position). diff --git a/src/IO/WriteBufferDecorator.h b/src/IO/WriteBufferDecorator.h index 88161f8d232..109c2bd24e4 100644 --- a/src/IO/WriteBufferDecorator.h +++ b/src/IO/WriteBufferDecorator.h @@ -47,6 +47,11 @@ public: } } + void cancelImpl() noexcept override + { + out->cancel(); + } + WriteBuffer * getNestedBuffer() { return out; } protected: diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp index 0ca6c26f08c..37b1161356f 100644 --- a/src/IO/WriteBufferFromFile.cpp +++ b/src/IO/WriteBufferFromFile.cpp @@ -77,7 +77,16 @@ WriteBufferFromFile::~WriteBufferFromFile() if (fd < 0) return; - finalize(); + try + { + if (!canceled) + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + int err = ::close(fd); /// Everything except for EBADF should be ignored in dtor, since all of /// others (EINTR/EIO/ENOSPC/EDQUOT) could be possible during writing to @@ -103,7 +112,8 @@ void WriteBufferFromFile::close() if (fd < 0) return; - finalize(); + if (!canceled) + finalize(); if (0 != ::close(fd)) throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); diff --git a/src/IO/WriteBufferFromFileDecorator.cpp b/src/IO/WriteBufferFromFileDecorator.cpp index 0e4e5e13a86..b1e7d843d92 100644 --- a/src/IO/WriteBufferFromFileDecorator.cpp +++ b/src/IO/WriteBufferFromFileDecorator.cpp @@ -28,6 +28,12 @@ void WriteBufferFromFileDecorator::finalizeImpl() } } +void WriteBufferFromFileDecorator::cancelImpl() noexcept +{ + SwapHelper swap(*this, *impl); + impl->cancel(); +} + WriteBufferFromFileDecorator::~WriteBufferFromFileDecorator() { /// It is not a mistake that swap is called here diff --git a/src/IO/WriteBufferFromFileDecorator.h b/src/IO/WriteBufferFromFileDecorator.h index 5344bb1425c..07f843986bb 100644 --- a/src/IO/WriteBufferFromFileDecorator.h +++ b/src/IO/WriteBufferFromFileDecorator.h @@ -24,6 +24,8 @@ public: protected: void finalizeImpl() override; + void cancelImpl() noexcept override; + std::unique_ptr impl; private: diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp index 813ef0deab9..f1207edc55b 100644 --- a/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/src/IO/WriteBufferFromFileDescriptor.cpp @@ -105,7 +105,15 @@ WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor( WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor() { - finalize(); + try + { + if (!canceled) + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } void WriteBufferFromFileDescriptor::finalizeImpl() diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index 10d9fd131cd..5ed4dbdc787 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -197,7 +197,8 @@ WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket() { try { - finalize(); + if (!canceled) + finalize(); } catch (...) { diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index b796c029051..3682e49b018 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -72,7 +72,7 @@ struct WriteBufferFromS3::PartData } }; -BufferAllocationPolicyPtr createBufferAllocationPolicy(const S3Settings::RequestSettings::PartUploadSettings & settings) +BufferAllocationPolicyPtr createBufferAllocationPolicy(const S3::RequestSettings & settings) { BufferAllocationPolicy::Settings allocation_settings; allocation_settings.strict_size = settings.strict_upload_part_size; @@ -91,7 +91,7 @@ WriteBufferFromS3::WriteBufferFromS3( const String & bucket_, const String & key_, size_t buf_size_, - const S3Settings::RequestSettings & request_settings_, + const S3::RequestSettings & request_settings_, BlobStorageLogWriterPtr blob_log_, std::optional> object_metadata_, ThreadPoolCallbackRunnerUnsafe schedule_, @@ -100,15 +100,14 @@ WriteBufferFromS3::WriteBufferFromS3( , bucket(bucket_) , key(key_) , request_settings(request_settings_) - , upload_settings(request_settings.getUploadSettings()) , write_settings(write_settings_) , client_ptr(std::move(client_ptr_)) , object_metadata(std::move(object_metadata_)) - , buffer_allocation_policy(createBufferAllocationPolicy(upload_settings)) + , buffer_allocation_policy(createBufferAllocationPolicy(request_settings)) , task_tracker( std::make_unique( std::move(schedule_), - upload_settings.max_inflight_parts_for_one_file, + request_settings.max_inflight_parts_for_one_file, limitedLog)) , blob_log(std::move(blob_log_)) { @@ -165,7 +164,7 @@ void WriteBufferFromS3::preFinalize() if (multipart_upload_id.empty() && detached_part_data.size() <= 1) { - if (detached_part_data.empty() || detached_part_data.front().data_size <= upload_settings.max_single_part_upload_size) + if (detached_part_data.empty() || detached_part_data.front().data_size <= request_settings.max_single_part_upload_size) do_single_part_upload = true; } @@ -214,9 +213,9 @@ void WriteBufferFromS3::finalizeImpl() if (request_settings.check_objects_after_upload) { - S3::checkObjectExists(*client_ptr, bucket, key, {}, request_settings, "Immediately after upload"); + S3::checkObjectExists(*client_ptr, bucket, key, {}, "Immediately after upload"); - size_t actual_size = S3::getObjectSize(*client_ptr, bucket, key, {}, request_settings); + size_t actual_size = S3::getObjectSize(*client_ptr, bucket, key, {}); if (actual_size != total_size) throw Exception( ErrorCodes::S3_ERROR, @@ -225,6 +224,11 @@ void WriteBufferFromS3::finalizeImpl() } } +void WriteBufferFromS3::cancelImpl() noexcept +{ + tryToAbortMultipartUpload(); +} + String WriteBufferFromS3::getVerboseLogDetails() const { String multipart_upload_details; @@ -247,7 +251,7 @@ String WriteBufferFromS3::getShortLogDetails() const bucket, key, multipart_upload_details); } -void WriteBufferFromS3::tryToAbortMultipartUpload() +void WriteBufferFromS3::tryToAbortMultipartUpload() noexcept { try { @@ -265,8 +269,19 @@ WriteBufferFromS3::~WriteBufferFromS3() { LOG_TRACE(limitedLog, "Close WriteBufferFromS3. {}.", getShortLogDetails()); + if (canceled) + { + LOG_INFO( + log, + "WriteBufferFromS3 was canceled." + "The file might not be written to S3. " + "{}.", + getVerboseLogDetails()); + return; + } + /// That destructor could be call with finalized=false in case of exceptions - if (!finalized) + if (!finalized && !canceled) { LOG_INFO( log, @@ -505,18 +520,18 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) "Unable to write a part without multipart_upload_id, details: WriteBufferFromS3 created for bucket {}, key {}", bucket, key); - if (part_number > upload_settings.max_part_number) + if (part_number > request_settings.max_part_number) { throw Exception( ErrorCodes::INVALID_CONFIG_PARAMETER, "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, " "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_part_upload_size = {}", - upload_settings.max_part_number, count(), upload_settings.min_upload_part_size, upload_settings.max_upload_part_size, - upload_settings.upload_part_size_multiply_factor, upload_settings.upload_part_size_multiply_parts_count_threshold, - upload_settings.max_single_part_upload_size); + request_settings.max_part_number, count(), request_settings.min_upload_part_size, request_settings.max_upload_part_size, + request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold, + request_settings.max_single_part_upload_size); } - if (data.data_size > upload_settings.max_upload_part_size) + if (data.data_size > request_settings.max_upload_part_size) { throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -524,7 +539,7 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) getShortLogDetails(), part_number, data.data_size, - upload_settings.max_upload_part_size + request_settings.max_upload_part_size ); } @@ -611,7 +626,7 @@ void WriteBufferFromS3::completeMultipartUpload() req.SetMultipartUpload(multipart_upload); - size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries.value, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload); @@ -669,8 +684,8 @@ S3::PutObjectRequest WriteBufferFromS3::getPutRequest(PartData & data) req.SetBody(data.createAwsBuffer()); if (object_metadata.has_value()) req.SetMetadata(object_metadata.value()); - if (!upload_settings.storage_class_name.empty()) - req.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(upload_settings.storage_class_name)); + if (!request_settings.storage_class_name.value.empty()) + req.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(request_settings.storage_class_name)); /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840 req.SetContentType("binary/octet-stream"); @@ -694,7 +709,7 @@ void WriteBufferFromS3::makeSinglepartUpload(WriteBufferFromS3::PartData && data auto & request = std::get<0>(*worker_data); size_t content_length = request.GetContentLength(); - size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries.value, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3PutObject); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index fbfec3588fa..b026da607c5 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -38,7 +38,7 @@ public: const String & bucket_, const String & key_, size_t buf_size_, - const S3Settings::RequestSettings & request_settings_, + const S3::RequestSettings & request_settings_, BlobStorageLogWriterPtr blob_log_, std::optional> object_metadata_ = std::nullopt, ThreadPoolCallbackRunnerUnsafe schedule_ = {}, @@ -54,6 +54,8 @@ private: /// Receives response from the server after sending all data. void finalizeImpl() override; + void cancelImpl() noexcept override; + String getVerboseLogDetails() const; String getShortLogDetails() const; @@ -71,15 +73,14 @@ private: void createMultipartUpload(); void completeMultipartUpload(); void abortMultipartUpload(); - void tryToAbortMultipartUpload(); + void tryToAbortMultipartUpload() noexcept; S3::PutObjectRequest getPutRequest(PartData & data); void makeSinglepartUpload(PartData && data); const String bucket; const String key; - const S3Settings::RequestSettings request_settings; - const S3Settings::RequestSettings::PartUploadSettings & upload_settings; + const S3::RequestSettings request_settings; const WriteSettings write_settings; const std::shared_ptr client_ptr; const std::optional> object_metadata; diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h index 1ea32af2968..17a329d401d 100644 --- a/src/IO/WriteBufferFromVector.h +++ b/src/IO/WriteBufferFromVector.h @@ -63,7 +63,8 @@ public: ~WriteBufferFromVector() override { - finalize(); + if (!canceled) + finalize(); } private: diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index d4b2d8ea0dc..6b0de441e94 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -1420,7 +1420,7 @@ struct fmt::formatter } template - auto format(const DB::UUID & uuid, FormatContext & context) + auto format(const DB::UUID & uuid, FormatContext & context) const { return fmt::format_to(context.out(), "{}", toString(uuid)); } diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index 3a21d7201a9..215bb1a3270 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -320,11 +320,13 @@ static inline void readUIntTextUpToNSignificantDigits(T & x, ReadBuffer & buf) template -ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) +ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in, bool & has_fractional) { static_assert(std::is_same_v || std::is_same_v, "Argument for readFloatTextImpl must be float or double"); static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); + has_fractional = false; + static constexpr bool throw_exception = std::is_same_v; bool negative = false; @@ -377,6 +379,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) if (checkChar('.', in)) { + has_fractional = true; auto after_point_count = in.count(); while (!in.eof() && *in.position() == '0') @@ -394,6 +397,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) { if (checkChar('e', in) || checkChar('E', in)) { + has_fractional = true; if (in.eof()) { if constexpr (throw_exception) @@ -420,10 +424,14 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) } if (after_point) + { x += static_cast(shift10(after_point, after_point_exponent)); + } if (exponent) + { x = static_cast(shift10(x, exponent)); + } if (negative) x = -x; @@ -590,8 +598,16 @@ ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf) template void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl(x, in); } template bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl(x, in); } -template void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl(x, in); } -template bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl(x, in); } +template void readFloatTextFast(T & x, ReadBuffer & in) +{ + bool has_fractional; + readFloatTextFastImpl(x, in, has_fractional); +} +template bool tryReadFloatTextFast(T & x, ReadBuffer & in) +{ + bool has_fractional; + return readFloatTextFastImpl(x, in, has_fractional); +} template void readFloatTextSimple(T & x, ReadBuffer & in) { readFloatTextSimpleImpl(x, in); } template bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { return readFloatTextSimpleImpl(x, in); } @@ -603,6 +619,21 @@ template void readFloatText(T & x, ReadBuffer & in) { readFloatText template bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); } /// Don't read exponent part of the number. -template bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in) { return readFloatTextFastImpl(x, in); } +template bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in) +{ + bool has_fractional; + return readFloatTextFastImpl(x, in, has_fractional); +} + +/// With a @has_fractional flag +/// Used for input_format_try_infer_integers +template bool tryReadFloatTextExt(T & x, ReadBuffer & in, bool & has_fractional) +{ + return readFloatTextFastImpl(x, in, has_fractional); +} +template bool tryReadFloatTextExtNoExponent(T & x, ReadBuffer & in, bool & has_fractional) +{ + return readFloatTextFastImpl(x, in, has_fractional); +} } diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index 4a4d7cc0fc2..3c1af6538ad 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -546,8 +546,8 @@ public: std::unique_ptr getWriteBuffer(String file_name = "file") { - S3Settings::RequestSettings request_settings; - request_settings.updateFromSettingsIfChanged(settings); + S3::RequestSettings request_settings; + request_settings.updateFromSettings(settings, /* if_changed */true, /* validate_settings */false); client->resetCounters(); diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index a137404a669..ac3b549a576 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -118,7 +118,7 @@ namespace /// Checks if the current user has enough access rights granted with grant option to grant or revoke specified access rights. void checkGrantOption( const AccessControl & access_control, - const ContextAccess & current_user_access, + const ContextAccessWrapper & current_user_access, const std::vector & grantees_from_query, bool & need_check_grantees_are_allowed, const AccessRightsElements & elements_to_grant, @@ -200,7 +200,7 @@ namespace /// Checks if the current user has enough roles granted with admin option to grant or revoke specified roles. void checkAdminOption( const AccessControl & access_control, - const ContextAccess & current_user_access, + const ContextAccessWrapper & current_user_access, const std::vector & grantees_from_query, bool & need_check_grantees_are_allowed, const std::vector & roles_to_grant, @@ -277,7 +277,7 @@ namespace /// This function is less accurate than checkAdminOption() because it cannot use any information about /// granted roles the grantees currently have (due to those grantees are located on multiple nodes, /// we just don't have the full information about them). - void checkAdminOptionForExecutingOnCluster(const ContextAccess & current_user_access, + void checkAdminOptionForExecutingOnCluster(const ContextAccessWrapper & current_user_access, const std::vector roles_to_grant, const RolesOrUsersSet & roles_to_revoke) { @@ -376,7 +376,7 @@ namespace /// Calculates all available rights to grant with current user intersection. void calculateCurrentGrantRightsWithIntersection( AccessRights & rights, - std::shared_ptr current_user_access, + std::shared_ptr current_user_access, const AccessRightsElements & elements_to_grant) { AccessRightsElements current_user_grantable_elements; @@ -438,6 +438,12 @@ BlockIO InterpreterGrantQuery::execute() RolesOrUsersSet roles_to_revoke; collectRolesToGrantOrRevoke(access_control, query, roles_to_grant, roles_to_revoke); + /// Replacing empty database with the default. This step must be done before replication to avoid privilege escalation. + String current_database = getContext()->getCurrentDatabase(); + elements_to_grant.replaceEmptyDatabase(current_database); + elements_to_revoke.replaceEmptyDatabase(current_database); + query.access_rights_elements.replaceEmptyDatabase(current_database); + /// Executing on cluster. if (!query.cluster.empty()) { @@ -453,9 +459,6 @@ BlockIO InterpreterGrantQuery::execute() } /// Check if the current user has corresponding access rights granted with grant option. - String current_database = getContext()->getCurrentDatabase(); - elements_to_grant.replaceEmptyDatabase(current_database); - elements_to_revoke.replaceEmptyDatabase(current_database); bool need_check_grantees_are_allowed = true; if (!query.current_grants) checkGrantOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, elements_to_grant, elements_to_revoke); diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index cfccc835d29..34f3e0a98bd 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -596,6 +596,34 @@ void ActionsDAG::removeUnusedActions(const std::unordered_set & us std::erase_if(inputs, [&](const Node * node) { return !visited_nodes.contains(node); }); } + +void ActionsDAG::removeAliasesForFilter(const std::string & filter_name) +{ + const auto & filter_node = findInOutputs(filter_name); + std::stack stack; + stack.push(const_cast(&filter_node)); + + std::unordered_set visited; + visited.insert(stack.top()); + + while (!stack.empty()) + { + auto * node = stack.top(); + stack.pop(); + for (auto & child : node->children) + { + while (child->type == ActionType::ALIAS) + child = child->children.front(); + + if (!visited.contains(child)) + { + stack.push(const_cast(child)); + visited.insert(child); + } + } + } +} + ActionsDAGPtr ActionsDAG::cloneSubDAG(const NodeRawConstPtrs & outputs, bool remove_aliases) { auto actions = std::make_shared(); @@ -758,9 +786,6 @@ Block ActionsDAG::updateHeader(const Block & header) const for (auto & col : result_columns) res.insert(std::move(col)); - if (isInputProjected()) - return res; - res.reserve(header.columns() - pos_to_remove.size()); for (size_t i = 0; i < header.columns(); i++) { @@ -1122,8 +1147,33 @@ void ActionsDAG::project(const NamesWithAliases & projection) } removeUnusedActions(); - projectInput(); - projected_output = true; +} + +void ActionsDAG::appendInputsForUnusedColumns(const Block & sample_block) +{ + std::unordered_map> names_map; + size_t num_columns = sample_block.columns(); + for (size_t pos = 0; pos < num_columns; ++pos) + names_map[sample_block.getByPosition(pos).name].push_back(pos); + + for (const auto * input : inputs) + { + auto & positions = names_map[input->result_name]; + if (positions.empty()) + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, + "Not found column {} in block {}", input->result_name, sample_block.dumpStructure()); + + positions.pop_front(); + } + + for (const auto & [_, positions] : names_map) + { + for (auto pos : positions) + { + const auto & col = sample_block.getByPosition(pos); + addInput(col.name, col.type); + } + } } bool ActionsDAG::tryRestoreColumn(const std::string & column_name) @@ -1199,8 +1249,6 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) ActionsDAGPtr ActionsDAG::clone() const { auto actions = std::make_shared(); - actions->project_input = project_input; - actions->projected_output = projected_output; std::unordered_map copy_map; @@ -1294,9 +1342,6 @@ std::string ActionsDAG::dumpDAG() const out << ' ' << map[node]; out << '\n'; - out << "Project input: " << project_input << '\n'; - out << "Projected output: " << projected_output << '\n'; - return out.str(); } @@ -1381,7 +1426,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( FunctionOverloadResolverPtr func_builder_materialize = std::make_unique(std::make_shared()); - std::map> inputs; + std::unordered_map> inputs; if (mode == MatchColumnsMode::Name) { size_t input_nodes_size = actions_dag->inputs.size(); @@ -1497,8 +1542,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( } actions_dag->outputs.swap(projection); - actions_dag->removeUnusedActions(); - actions_dag->projectInput(); + actions_dag->removeUnusedActions(false); return actions_dag; } @@ -1556,10 +1600,6 @@ void ActionsDAG::mergeInplace(ActionsDAG && second) auto it = first_result.find(input_node->result_name); if (it == first_result.end() || it->second.empty()) { - if (first.project_input) - throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, - "Cannot find column {} in ActionsDAG result", input_node->result_name); - first.inputs.push_back(input_node); } else @@ -1595,13 +1635,6 @@ void ActionsDAG::mergeInplace(ActionsDAG && second) } } - /// Update output nodes. - if (second.project_input) - { - first.outputs.swap(second.outputs); - first.project_input = true; - } - else { /// Add not removed result from first actions. for (const auto * output_node : first.outputs) @@ -1617,8 +1650,6 @@ void ActionsDAG::mergeInplace(ActionsDAG && second) } first.nodes.splice(first.nodes.end(), std::move(second.nodes)); - - first.projected_output = second.projected_output; } void ActionsDAG::mergeNodes(ActionsDAG && second, NodeRawConstPtrs * out_outputs) @@ -1704,7 +1735,7 @@ void ActionsDAG::mergeNodes(ActionsDAG && second, NodeRawConstPtrs * out_outputs } } -ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split_nodes, bool create_split_nodes_mapping) const +ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split_nodes, bool create_split_nodes_mapping, bool avoid_duplicate_inputs) const { /// Split DAG into two parts. /// (first_nodes, first_outputs) is a part which will have split_list in result. @@ -1718,6 +1749,14 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split /// List of nodes from current actions which are not inputs, but will be in second part. NodeRawConstPtrs new_inputs; + /// Avoid new inputs to have the same name as existing inputs. + /// It's allowed for DAG but may break Block invariant 'columns with identical name must have identical structure'. + std::unordered_set duplicate_inputs; + size_t duplicate_counter = 0; + if (avoid_duplicate_inputs) + for (const auto * input : inputs) + duplicate_inputs.insert(input->result_name); + struct Frame { const Node * node = nullptr; @@ -1830,7 +1869,8 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split input_node.result_name = child->result_name; child_data.to_second = &second_nodes.emplace_back(std::move(input_node)); - new_inputs.push_back(child); + if (child->type != ActionType::INPUT) + new_inputs.push_back(child); } } @@ -1886,7 +1926,32 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split for (const auto * input : new_inputs) { - const auto & cur = data[input]; + auto & cur = data[input]; + + if (avoid_duplicate_inputs) + { + bool is_name_updated = false; + while (!duplicate_inputs.insert(cur.to_first->result_name).second) + { + is_name_updated = true; + cur.to_first->result_name = fmt::format("{}_{}", input->result_name, duplicate_counter); + ++duplicate_counter; + } + + if (is_name_updated) + { + Node input_node; + input_node.type = ActionType::INPUT; + input_node.result_type = cur.to_first->result_type; + input_node.result_name = cur.to_first->result_name; + + auto * new_input = &second_nodes.emplace_back(std::move(input_node)); + cur.to_second->type = ActionType::ALIAS; + cur.to_second->children = {new_input}; + cur.to_second = new_input; + } + } + second_inputs.push_back(cur.to_second); first_outputs.push_back(cur.to_first); } @@ -1980,7 +2045,6 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & } auto res = split(split_nodes); - res.second->project_input = project_input; return res; } @@ -2024,7 +2088,6 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameS dumpDAG()); auto res = split(split_nodes); - res.second->project_input = project_input; return res; } @@ -2096,7 +2159,6 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & co std::unordered_set split_nodes = {node}; auto res = split(split_nodes); - res.second->project_input = project_input; return res; } @@ -2683,11 +2745,7 @@ void ActionsDAG::removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions std::unordered_set used_inputs; for (const auto * input : inputs) - { - if (removes_filter && input == predicate) - continue; used_inputs.insert(input); - } removeUnusedActions(used_inputs); } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 8c0e3f0e576..c9974fd849c 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -103,13 +103,11 @@ private: NodeRawConstPtrs inputs; NodeRawConstPtrs outputs; - bool project_input = false; - bool projected_output = false; - public: ActionsDAG() = default; ActionsDAG(ActionsDAG &&) = default; ActionsDAG(const ActionsDAG &) = delete; + ActionsDAG & operator=(ActionsDAG &&) = default; ActionsDAG & operator=(const ActionsDAG &) = delete; explicit ActionsDAG(const NamesAndTypesList & inputs_); explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_); @@ -168,9 +166,12 @@ public: /// Call addAlias several times. void addAliases(const NamesWithAliases & aliases); - /// Add alias actions and remove unused columns from outputs. Also specify result columns order in outputs. + /// Add alias actions. Also specify result columns order in outputs. void project(const NamesWithAliases & projection); + /// Add input for every column from sample_block which is not mapped to existing input. + void appendInputsForUnusedColumns(const Block & sample_block); + /// If column is not in outputs, try to find it in nodes and insert back into outputs. bool tryRestoreColumn(const std::string & column_name); @@ -179,10 +180,6 @@ public: /// Return true if column was removed from inputs. bool removeUnusedResult(const std::string & column_name); - void projectInput(bool project = true) { project_input = project; } - bool isInputProjected() const { return project_input; } - bool isOutputProjected() const { return projected_output; } - /// Remove actions that are not needed to compute output nodes void removeUnusedActions(bool allow_remove_inputs = true, bool allow_constant_folding = true); @@ -195,6 +192,8 @@ public: /// Remove actions that are not needed to compute output nodes with required names void removeUnusedActions(const NameSet & required_names, bool allow_remove_inputs = true, bool allow_constant_folding = true); + void removeAliasesForFilter(const std::string & filter_name); + /// Transform the current DAG in a way that leaf nodes get folded into their parents. It's done /// because each projection can provide some columns as inputs to substitute certain sub-DAGs /// (expressions). Consider the following example: @@ -343,7 +342,7 @@ public: /// initial DAG : (a, b, c, d, e) -> (w, x, y, z) | 1 a 2 b 3 c 4 d 5 e 6 -> 1 2 3 4 5 6 w x y z /// split (first) : (a, c, d) -> (i, j, k, w, y) | 1 a 2 b 3 c 4 d 5 e 6 -> 1 2 b 3 4 5 e 6 i j k w y /// split (second) : (i, j, k, y, b, e) -> (x, y, z) | 1 2 b 3 4 5 e 6 i j k w y -> 1 2 3 4 5 6 w x y z - SplitResult split(std::unordered_set split_nodes, bool create_split_nodes_mapping = false) const; + SplitResult split(std::unordered_set split_nodes, bool create_split_nodes_mapping = false, bool avoid_duplicate_inputs = false) const; /// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN. SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const; @@ -508,4 +507,15 @@ struct ActionDAGNodes ActionsDAG::NodeRawConstPtrs nodes; }; +/// Helper for query analysis. +/// If project_input is set, all columns not found in inputs should be removed. +/// Now, we do it before adding a step to query plan by calling appendInputsForUnusedColumns. +struct ActionsAndProjectInputsFlag +{ + ActionsDAG dag; + bool project_input = false; +}; + +using ActionsAndProjectInputsFlagPtr = std::shared_ptr; + } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 0bdd4c089f1..c3285d73145 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -102,7 +103,7 @@ static size_t getTypeDepth(const DataTypePtr & type) /// 33.33 in the set is converted to 33.3, but it is not equal to 33.3 in the column, so the result should still be empty. /// We can not include values that don't represent any possible value from the type of filtered column to the set. template -static Block createBlockFromCollection(const Collection & collection, const DataTypes & types, bool transform_null_in) +static Block createBlockFromCollection(const Collection & collection, const DataTypes & value_types, const DataTypes & types, bool transform_null_in) { size_t columns_num = types.size(); MutableColumns columns(columns_num); @@ -113,11 +114,12 @@ static Block createBlockFromCollection(const Collection & collection, const Data } Row tuple_values; - for (const auto & value : collection) + for (size_t collection_index = 0; collection_index < collection.size(); ++collection_index) { + const auto& value = collection[collection_index]; if (columns_num == 1) { - auto field = convertFieldToTypeStrict(value, *types[0]); + auto field = convertFieldToTypeStrict(value, *value_types[collection_index], *types[0]); bool need_insert_null = transform_null_in && types[0]->isNullable(); if (field && (!field->isNull() || need_insert_null)) columns[0]->insert(*field); @@ -130,7 +132,6 @@ static Block createBlockFromCollection(const Collection & collection, const Data const auto & tuple = value.template get(); size_t tuple_size = tuple.size(); - if (tuple_size != columns_num) throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect size of tuple in set: {} instead of {}", tuple_size, columns_num); @@ -138,10 +139,13 @@ static Block createBlockFromCollection(const Collection & collection, const Data if (tuple_values.empty()) tuple_values.resize(tuple_size); + const DataTypePtr & value_type = value_types[collection_index]; + const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); + size_t i = 0; for (; i < tuple_size; ++i) { - auto converted_field = convertFieldToTypeStrict(tuple[i], *types[i]); + auto converted_field = convertFieldToTypeStrict(tuple[i], *tuple_value_type[i], *types[i]); if (!converted_field) break; tuple_values[i] = std::move(*converted_field); @@ -317,16 +321,25 @@ Block createBlockForSet( if (left_type_depth == right_type_depth) { Array array{right_arg_value}; - block = createBlockFromCollection(array, set_element_types, tranform_null_in); + DataTypes value_types{right_arg_type}; + block = createBlockFromCollection(array, value_types, set_element_types, tranform_null_in); } /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc. else if (left_type_depth + 1 == right_type_depth) { auto type_index = right_arg_type->getTypeId(); if (type_index == TypeIndex::Tuple) - block = createBlockFromCollection(right_arg_value.get(), set_element_types, tranform_null_in); + { + const DataTypes & value_types = assert_cast(right_arg_type.get())->getElements(); + block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); + } else if (type_index == TypeIndex::Array) - block = createBlockFromCollection(right_arg_value.get(), set_element_types, tranform_null_in); + { + const auto* right_arg_array_type = assert_cast(right_arg_type.get()); + size_t right_arg_array_size = right_arg_value.get().size(); + DataTypes value_types(right_arg_array_size, right_arg_array_type->getNestedType()); + block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); + } else throw_unsupported_type(right_arg_type); } @@ -392,6 +405,9 @@ Block createBlockForSet( } +ScopeStack::Level::Level() = default; +ScopeStack::Level::~Level() = default; +ScopeStack::Level::Level(Level &&) noexcept = default; FutureSetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets) @@ -486,16 +502,12 @@ public: } }; -ScopeStack::Level::~Level() = default; -ScopeStack::Level::Level() = default; -ScopeStack::Level::Level(Level &&) noexcept = default; - ActionsMatcher::Data::Data( ContextPtr context_, SizeLimits set_size_limit_, size_t subquery_depth_, std::reference_wrapper source_columns_, - ActionsDAGPtr actions_dag, + ActionsDAG actions_dag, PreparedSetsPtr prepared_sets_, bool no_subqueries_, bool no_makeset_, @@ -531,13 +543,13 @@ std::vector ActionsMatcher::Data::getAllColumnNames() const return index.getAllNames(); } -ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, ContextPtr context_) : WithContext(context_) +ScopeStack::ScopeStack(ActionsDAG actions_dag, ContextPtr context_) : WithContext(context_) { auto & level = stack.emplace_back(); level.actions_dag = std::move(actions_dag); - level.index = std::make_unique(level.actions_dag->getOutputs()); + level.index = std::make_unique(level.actions_dag.getOutputs()); - for (const auto & node : level.actions_dag->getOutputs()) + for (const auto & node : level.actions_dag.getOutputs()) if (node->type == ActionsDAG::ActionType::INPUT) level.inputs.emplace(node->result_name); } @@ -545,22 +557,21 @@ ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, ContextPtr context_) : WithCon void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) { auto & level = stack.emplace_back(); - level.actions_dag = std::make_shared(); - level.index = std::make_unique(level.actions_dag->getOutputs()); + level.index = std::make_unique(level.actions_dag.getOutputs()); const auto & prev = stack[stack.size() - 2]; for (const auto & input_column : input_columns) { - const auto & node = level.actions_dag->addInput(input_column.name, input_column.type); + const auto & node = level.actions_dag.addInput(input_column.name, input_column.type); level.index->addNode(&node); level.inputs.emplace(input_column.name); } - for (const auto & node : prev.actions_dag->getOutputs()) + for (const auto & node : prev.actions_dag.getOutputs()) { if (!level.index->contains(node->result_name)) { - const auto & input = level.actions_dag->addInput({node->column, node->result_type, node->result_name}); + const auto & input = level.actions_dag.addInput({node->column, node->result_type, node->result_name}); level.index->addNode(&input); } } @@ -585,12 +596,12 @@ size_t ScopeStack::getColumnLevel(const std::string & name) void ScopeStack::addColumn(ColumnWithTypeAndName column) { - const auto & node = stack[0].actions_dag->addColumn(std::move(column)); + const auto & node = stack[0].actions_dag.addColumn(std::move(column)); stack[0].index->addNode(&node); for (size_t j = 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } @@ -599,12 +610,12 @@ void ScopeStack::addAlias(const std::string & name, std::string alias) { auto level = getColumnLevel(name); const auto & source = stack[level].index->getNode(name); - const auto & node = stack[level].actions_dag->addAlias(source, std::move(alias)); + const auto & node = stack[level].actions_dag.addAlias(source, std::move(alias)); stack[level].index->addNode(&node); for (size_t j = level + 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } @@ -618,12 +629,12 @@ void ScopeStack::addArrayJoin(const std::string & source_name, std::string resul throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression with arrayJoin cannot depend on lambda argument: {}", source_name); - const auto & node = stack.front().actions_dag->addArrayJoin(*source_node, std::move(result_name)); + const auto & node = stack.front().actions_dag.addArrayJoin(*source_node, std::move(result_name)); stack.front().index->addNode(&node); for (size_t j = 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } @@ -642,17 +653,17 @@ void ScopeStack::addFunction( for (const auto & argument : argument_names) children.push_back(&stack[level].index->getNode(argument)); - const auto & node = stack[level].actions_dag->addFunction(function, std::move(children), std::move(result_name)); + const auto & node = stack[level].actions_dag.addFunction(function, std::move(children), std::move(result_name)); stack[level].index->addNode(&node); for (size_t j = level + 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } -ActionsDAGPtr ScopeStack::popLevel() +ActionsDAG ScopeStack::popLevel() { auto res = std::move(stack.back().actions_dag); stack.pop_back(); @@ -661,12 +672,12 @@ ActionsDAGPtr ScopeStack::popLevel() std::string ScopeStack::dumpNames() const { - return stack.back().actions_dag->dumpNames(); + return stack.back().actions_dag.dumpNames(); } const ActionsDAG & ScopeStack::getLastActions() const { - return *stack.back().actions_dag; + return stack.back().actions_dag; } const ScopeStack::Index & ScopeStack::getLastActionsIndex() const @@ -989,7 +1000,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.set_size_limit, data.subquery_depth, data.source_columns, - std::make_shared(data.source_columns), + ActionsDAG(data.source_columns), data.prepared_sets, data.no_subqueries, data.no_makeset, @@ -1008,10 +1019,10 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } auto dag = index_hint_data.getActions(); - dag->project(args); + dag.project(args); auto index_hint = std::make_shared(); - index_hint->setActions(std::move(dag)); + index_hint->setActions(std::make_shared(std::move(dag))); // Arguments are removed. We add function instead of constant column to avoid constant folding. data.addFunction(std::make_unique(index_hint), {}, column_name); @@ -1271,10 +1282,10 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & auto lambda_dag = data.actions_stack.popLevel(); String result_name = lambda->arguments->children.at(1)->getColumnName(); - lambda_dag->removeUnusedActions(Names(1, result_name)); + lambda_dag.removeUnusedActions(Names(1, result_name)); auto lambda_actions = std::make_shared( - lambda_dag, + std::make_shared(std::move(lambda_dag)), ExpressionActionsSettings::fromContext(data.getContext(), CompileExpressions::yes)); DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 046c7387ee8..46d2d60e461 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -9,6 +10,7 @@ #include #include #include +#include namespace DB { @@ -43,20 +45,20 @@ struct ScopeStack : WithContext struct Level { - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; IndexPtr index; NameSet inputs; + ~Level(); Level(); Level(Level &&) noexcept; - ~Level(); }; - using Levels = std::vector; + using Levels = std::deque; Levels stack; - ScopeStack(ActionsDAGPtr actions_dag, ContextPtr context_); + ScopeStack(ActionsDAG actions_dag, ContextPtr context_); void pushLevel(const NamesAndTypesList & input_columns); @@ -67,7 +69,7 @@ struct ScopeStack : WithContext void addArrayJoin(const std::string & source_name, std::string result_name); void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name); - ActionsDAGPtr popLevel(); + ActionsDAG popLevel(); const ActionsDAG & getLastActions() const; const Index & getLastActionsIndex() const; @@ -147,7 +149,7 @@ public: SizeLimits set_size_limit_, size_t subquery_depth_, std::reference_wrapper source_columns_, - ActionsDAGPtr actions_dag, + ActionsDAG actions_dag, PreparedSetsPtr prepared_sets_, bool no_subqueries_, bool no_makeset_, @@ -182,7 +184,7 @@ public: actions_stack.addFunction(function, argument_names, std::move(result_name)); } - ActionsDAGPtr getActions() + ActionsDAG getActions() { return actions_stack.popLevel(); } diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h index ab078d1c5e5..43c80d361d1 100644 --- a/src/Interpreters/AggregationCommon.h +++ b/src/Interpreters/AggregationCommon.h @@ -90,10 +90,7 @@ void fillFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const S /// Note: here we violate strict aliasing. /// It should be ok as log as we do not reffer to any value from `out` before filling. const char * source = static_cast(column)->getRawDataBegin(); - size_t offset_to = offset; - if constexpr (std::endian::native == std::endian::big) - offset_to = sizeof(Key) - sizeof(T) - offset; - T * dest = reinterpret_cast(reinterpret_cast(out.data()) + offset_to); + T * dest = reinterpret_cast(reinterpret_cast(out.data()) + offset); fillFixedBatch(num_rows, reinterpret_cast(source), dest); /// NOLINT(bugprone-sizeof-expression) offset += sizeof(T); } diff --git a/src/Interpreters/AggregationMethod.cpp b/src/Interpreters/AggregationMethod.cpp index 3ff4f0cae43..0fc789528b8 100644 --- a/src/Interpreters/AggregationMethod.cpp +++ b/src/Interpreters/AggregationMethod.cpp @@ -160,10 +160,7 @@ void AggregationMethodKeysFixedinsertData(reinterpret_cast(&key) + offset_to, size); + observed_column->insertData(reinterpret_cast(&key) + pos, size); pos += size; } } diff --git a/src/Interpreters/AsynchronousMetricLog.h b/src/Interpreters/AsynchronousMetricLog.h index 739b2aa5b56..2ce1d929592 100644 --- a/src/Interpreters/AsynchronousMetricLog.h +++ b/src/Interpreters/AsynchronousMetricLog.h @@ -8,8 +8,6 @@ #include #include -#include -#include #include diff --git a/src/Interpreters/BlobStorageLog.cpp b/src/Interpreters/BlobStorageLog.cpp index 0324ef8713c..f20ac9165ac 100644 --- a/src/Interpreters/BlobStorageLog.cpp +++ b/src/Interpreters/BlobStorageLog.cpp @@ -9,6 +9,8 @@ #include #include +#include +#include namespace DB { @@ -69,4 +71,32 @@ void BlobStorageLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(error_message); } +void BlobStorageLog::addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const +{ + SystemLog::addSettingsForQuery(mutable_context, query_kind); + + if (query_kind == IAST::QueryKind::Insert) + mutable_context->setSetting("enable_blob_storage_log", false); +} + +static std::string_view normalizePath(std::string_view path) +{ + if (path.starts_with("./")) + path.remove_prefix(2); + if (path.ends_with("/")) + path.remove_suffix(1); + return path; +} + +void BlobStorageLog::prepareTable() +{ + SystemLog::prepareTable(); + if (auto merge_tree_table = std::dynamic_pointer_cast(getStorage())) + { + std::unique_lock lock{prepare_mutex}; + const auto & relative_data_path = merge_tree_table->getRelativeDataPath(); + prefix_to_ignore = normalizePath(relative_data_path); + } +} + } diff --git a/src/Interpreters/BlobStorageLog.h b/src/Interpreters/BlobStorageLog.h index 15e15be4f87..cf8f37299f7 100644 --- a/src/Interpreters/BlobStorageLog.h +++ b/src/Interpreters/BlobStorageLog.h @@ -1,11 +1,14 @@ #pragma once -#include -#include -#include -#include -#include #include +#include + +#include + +#include +#include +#include +#include namespace DB { @@ -51,7 +54,23 @@ struct BlobStorageLogElement class BlobStorageLog : public SystemLog { +public: using SystemLog::SystemLog; + + /// We should not log events for table itself to avoid infinite recursion + bool shouldIgnorePath(const String & path) const + { + std::shared_lock lock{prepare_mutex}; + return !prefix_to_ignore.empty() && path.starts_with(prefix_to_ignore); + } + +protected: + void prepareTable() override; + void addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const override; + +private: + mutable std::shared_mutex prepare_mutex; + String prefix_to_ignore; }; } diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index 55453b78ead..8d2a9d0a2da 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -6,7 +6,7 @@ namespace DB static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 32 * 1024 * 1024; /// 32Mi static constexpr int FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT = 4 * 1024 * 1024; /// 4Mi -static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 5; +static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 0; static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_QUEUE_SIZE_LIMIT = 5000; static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 16; static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 61a356fa3c3..838ca0b491e 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -187,13 +187,6 @@ size_t FileSegment::getDownloadedSize() const return downloaded_size; } -void FileSegment::setDownloadedSize(size_t delta) -{ - auto lk = lock(); - downloaded_size += delta; - assert(downloaded_size == std::filesystem::file_size(getPath())); -} - bool FileSegment::isDownloaded() const { auto lk = lock(); @@ -311,6 +304,11 @@ FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader() return remote_file_reader; } +FileSegment::LocalCacheWriterPtr FileSegment::getLocalCacheWriter() +{ + return cache_writer; +} + void FileSegment::resetRemoteFileReader() { auto lk = lock(); @@ -340,33 +338,31 @@ void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_) remote_file_reader = remote_file_reader_; } -void FileSegment::write(char * from, size_t size, size_t offset) +void FileSegment::write(char * from, size_t size, size_t offset_in_file) { ProfileEventTimeIncrement watch(ProfileEvents::FileSegmentWriteMicroseconds); - - if (!size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing zero size is not allowed"); - + auto file_segment_path = getPath(); { - auto lk = lock(); - assertIsDownloaderUnlocked("write", lk); - assertNotDetachedUnlocked(lk); - } + if (!size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing zero size is not allowed"); - const auto file_segment_path = getPath(); + { + auto lk = lock(); + assertIsDownloaderUnlocked("write", lk); + assertNotDetachedUnlocked(lk); + } - { if (download_state != State::DOWNLOADING) throw Exception( ErrorCodes::LOGICAL_ERROR, "Expected DOWNLOADING state, got {}", stateToString(download_state)); const size_t first_non_downloaded_offset = getCurrentWriteOffset(); - if (offset != first_non_downloaded_offset) + if (offset_in_file != first_non_downloaded_offset) throw Exception( ErrorCodes::LOGICAL_ERROR, "Attempt to write {} bytes to offset: {}, but current write offset is {}", - size, offset, first_non_downloaded_offset); + size, offset_in_file, first_non_downloaded_offset); const size_t current_downloaded_size = getDownloadedSize(); chassert(reserved_size >= current_downloaded_size); @@ -396,10 +392,10 @@ void FileSegment::write(char * from, size_t size, size_t offset) #endif if (!cache_writer) - cache_writer = std::make_unique(file_segment_path, /* buf_size */0); + cache_writer = std::make_unique(getPath(), /* buf_size */0); /// Size is equal to offset as offset for write buffer points to data end. - cache_writer->set(from, size, /* offset */size); + cache_writer->set(from, /* size */size, /* offset */size); /// Reset the buffer when finished. SCOPE_EXIT({ cache_writer->set(nullptr, 0); }); /// Flush the buffer. @@ -435,7 +431,6 @@ void FileSegment::write(char * from, size_t size, size_t offset) } throw; - } catch (Exception & e) { @@ -445,7 +440,7 @@ void FileSegment::write(char * from, size_t size, size_t offset) throw; } - chassert(getCurrentWriteOffset() == offset + size); + chassert(getCurrentWriteOffset() == offset_in_file + size); } FileSegment::State FileSegment::wait(size_t offset) @@ -828,7 +823,7 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock) }; const auto file_path = getPath(); - if (segment_kind != FileSegmentKind::Temporary) + { std::lock_guard lk(write_mutex); if (downloaded_size == 0) diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index f28482a1ce4..d6b37b60dc1 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -48,7 +48,7 @@ friend class FileCache; /// Because of reserved_size in tryReserve(). public: using Key = FileCacheKey; using RemoteFileReaderPtr = std::shared_ptr; - using LocalCacheWriterPtr = std::unique_ptr; + using LocalCacheWriterPtr = std::shared_ptr; using Downloader = std::string; using DownloaderId = std::string; using Priority = IFileCachePriority; @@ -204,7 +204,7 @@ public: bool reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat = nullptr); /// Write data into reserved space. - void write(char * from, size_t size, size_t offset); + void write(char * from, size_t size, size_t offset_in_file); // Invariant: if state() != DOWNLOADING and remote file reader is present, the reader's // available() == 0, and getFileOffsetOfBufferEnd() == our getCurrentWriteOffset(). @@ -212,6 +212,7 @@ public: // The reader typically requires its internal_buffer to be assigned from the outside before // calling next(). RemoteFileReaderPtr getRemoteFileReader(); + LocalCacheWriterPtr getLocalCacheWriter(); RemoteFileReaderPtr extractRemoteFileReader(); @@ -219,8 +220,6 @@ public: void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_); - void setDownloadedSize(size_t delta); - void setDownloadFailed(); private: diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 5ed4ccdbeca..1d23278a255 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -944,14 +944,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( try { const auto path = key_metadata->getFileSegmentPath(*file_segment); - if (file_segment->segment_kind == FileSegmentKind::Temporary) - { - /// FIXME: For temporary file segment the requirement is not as strong because - /// the implementation of "temporary data in cache" creates files in advance. - if (fs::exists(path)) - fs::remove(path); - } - else if (file_segment->downloaded_size == 0) + if (file_segment->downloaded_size == 0) { chassert(!fs::exists(path)); } diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index bad8cb18525..7a3fdf5160e 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -325,7 +325,7 @@ void SLRUFileCachePriority::downgrade(IteratorPtr iterator, const CachePriorityG candidate_it->getEntry()->toString()); } - const size_t entry_size = candidate_it->entry->size; + const size_t entry_size = candidate_it->getEntry()->size; if (!probationary_queue.canFit(entry_size, 1, lock)) { throw Exception(ErrorCodes::LOGICAL_ERROR, @@ -483,7 +483,10 @@ SLRUFileCachePriority::SLRUIterator::SLRUIterator( SLRUFileCachePriority::EntryPtr SLRUFileCachePriority::SLRUIterator::getEntry() const { - return entry; + auto entry_ptr = entry.lock(); + if (!entry_ptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry pointer expired"); + return entry_ptr; } size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CachePriorityGuard::Lock & lock) diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index ee3cafe322d..2102a0ec558 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -125,7 +125,10 @@ private: SLRUFileCachePriority * cache_priority; LRUFileCachePriority::LRUIterator lru_iterator; - const EntryPtr entry; + /// Entry itself is stored by lru_iterator.entry. + /// We have it as a separate field to use entry without requiring any lock + /// (which will be required if we wanted to get entry from lru_iterator.getEntry()). + const std::weak_ptr entry; /// Atomic, /// but needed only in order to do FileSegment::getInfo() without any lock, /// which is done for system tables and logging. diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index a593ebfdab2..dd038948adf 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -33,21 +34,20 @@ namespace } WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_) - : WriteBufferFromFileDecorator(std::make_unique(file_segment_->getPath())) + : WriteBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) , file_segment(file_segment_) , reserve_space_lock_wait_timeout_milliseconds(getCacheLockWaitTimeout()) { } WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder_) - : WriteBufferFromFileDecorator( - segment_holder_->size() == 1 - ? std::make_unique(segment_holder_->front().getPath()) - : throw Exception(ErrorCodes::LOGICAL_ERROR, "WriteBufferToFileSegment can be created only from single segment")) + : WriteBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) , file_segment(&segment_holder_->front()) , segment_holder(std::move(segment_holder_)) , reserve_space_lock_wait_timeout_milliseconds(getCacheLockWaitTimeout()) { + if (segment_holder->size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "WriteBufferToFileSegment can be created only from single segment"); } /// If it throws an exception, the file segment will be incomplete, so you should not use it in the future. @@ -82,9 +82,6 @@ void WriteBufferToFileSegment::nextImpl() reserve_stat_msg += fmt::format("{} hold {}, can release {}; ", toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size)); - if (std::filesystem::exists(file_segment->getPath())) - std::filesystem::remove(file_segment->getPath()); - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: {}(segment info: {})", bytes_to_write, file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache", @@ -95,17 +92,37 @@ void WriteBufferToFileSegment::nextImpl() try { - SwapHelper swap(*this, *impl); /// Write data to the underlying buffer. - impl->next(); + file_segment->write(working_buffer.begin(), bytes_to_write, written_bytes); + written_bytes += bytes_to_write; } catch (...) { LOG_WARNING(getLogger("WriteBufferToFileSegment"), "Failed to write to the underlying buffer ({})", file_segment->getInfoForLog()); throw; } +} - file_segment->setDownloadedSize(bytes_to_write); +void WriteBufferToFileSegment::finalizeImpl() +{ + next(); + auto cache_writer = file_segment->getLocalCacheWriter(); + if (cache_writer) + { + SwapHelper swap(*this, *cache_writer); + cache_writer->finalize(); + } +} + +void WriteBufferToFileSegment::sync() +{ + next(); + auto cache_writer = file_segment->getLocalCacheWriter(); + if (cache_writer) + { + SwapHelper swap(*this, *cache_writer); + cache_writer->sync(); + } } std::unique_ptr WriteBufferToFileSegment::getReadBufferImpl() @@ -114,7 +131,10 @@ std::unique_ptr WriteBufferToFileSegment::getReadBufferImpl() * because in case destructor called without `getReadBufferImpl` called, data won't be read. */ finalize(); - return std::make_unique(file_segment->getPath()); + if (file_segment->getDownloadedSize() > 0) + return std::make_unique(file_segment->getPath()); + else + return std::make_unique(); } } diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h index c4b0491f8c0..4719dd4be89 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.h +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h @@ -9,7 +9,7 @@ namespace DB class FileSegment; -class WriteBufferToFileSegment : public WriteBufferFromFileDecorator, public IReadableWriteBuffer +class WriteBufferToFileSegment : public WriteBufferFromFileBase, public IReadableWriteBuffer { public: explicit WriteBufferToFileSegment(FileSegment * file_segment_); @@ -17,6 +17,13 @@ public: void nextImpl() override; + std::string getFileName() const override { return file_segment->getPath(); } + + void sync() override; + +protected: + void finalizeImpl() override; + private: std::unique_ptr getReadBufferImpl() override; @@ -29,6 +36,7 @@ private: FileSegmentsHolderPtr segment_holder; const size_t reserve_space_lock_wait_timeout_milliseconds; + size_t written_bytes = 0; }; diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 13e6fa87051..91c0c592f28 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -22,7 +22,9 @@ #include #include #include - +#include +#include +#include namespace DB { @@ -505,6 +507,41 @@ void executeQueryWithParallelReplicas( query_plan.addStep(std::move(read_from_remote)); } +void executeQueryWithParallelReplicas( + QueryPlan & query_plan, + const StorageID & storage_id, + QueryProcessingStage::Enum processed_stage, + const QueryTreeNodePtr & query_tree, + const PlannerContextPtr & planner_context, + ContextPtr context, + std::shared_ptr storage_limits) +{ + QueryTreeNodePtr modified_query_tree = query_tree->clone(); + rewriteJoinToGlobalJoin(modified_query_tree, context); + modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree); + + auto header + = InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_tree, context, SelectQueryOptions(processed_stage).analyze()); + auto modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree); + + executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits); +} + +void executeQueryWithParallelReplicas( + QueryPlan & query_plan, + const StorageID & storage_id, + QueryProcessingStage::Enum processed_stage, + const ASTPtr & query_ast, + ContextPtr context, + std::shared_ptr storage_limits) +{ + auto modified_query_ast = ClusterProxy::rewriteSelectQuery( + context, query_ast, storage_id.database_name, storage_id.table_name, /*remote_table_function_ptr*/ nullptr); + auto header = InterpreterSelectQuery(modified_query_ast, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + + executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits); +} + } } diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 284fea05135..6548edf8939 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -24,6 +24,12 @@ struct StorageID; struct StorageLimits; using StorageLimitsList = std::list; +class IQueryTreeNode; +using QueryTreeNodePtr = std::shared_ptr; + +class PlannerContext; +using PlannerContextPtr = std::shared_ptr; + namespace ClusterProxy { @@ -60,7 +66,6 @@ void executeQuery( AdditionalShardFilterGenerator shard_filter_generator, bool is_remote_function); - void executeQueryWithParallelReplicas( QueryPlan & query_plan, const StorageID & storage_id, @@ -69,6 +74,23 @@ void executeQueryWithParallelReplicas( const ASTPtr & query_ast, ContextPtr context, std::shared_ptr storage_limits); + +void executeQueryWithParallelReplicas( + QueryPlan & query_plan, + const StorageID & storage_id, + QueryProcessingStage::Enum processed_stage, + const ASTPtr & query_ast, + ContextPtr context, + std::shared_ptr storage_limits); + +void executeQueryWithParallelReplicas( + QueryPlan & query_plan, + const StorageID & storage_id, + QueryProcessingStage::Enum processed_stage, + const QueryTreeNodePtr & query_tree, + const PlannerContextPtr & planner_context, + ContextPtr context, + std::shared_ptr storage_limits); } } diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index c797ff27ece..efac648214e 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 5c9ae4716b9..b946c2cb21e 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -31,7 +32,7 @@ #include #include #include -#include +#include #include #include #include @@ -90,6 +91,7 @@ #include #include #include +#include #include #include #include @@ -280,6 +282,8 @@ struct ContextSharedPart : boost::noncopyable String default_profile_name; /// Default profile name used for default values. String system_profile_name; /// Profile used by system processes String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying + String merge_workload TSA_GUARDED_BY(mutex); /// Workload setting value that is used by all merges + String mutation_workload TSA_GUARDED_BY(mutex); /// Workload setting value that is used by all mutations std::unique_ptr access_control TSA_GUARDED_BY(mutex); mutable OnceFlag resource_manager_initialized; mutable ResourceManagerPtr resource_manager; @@ -364,13 +368,16 @@ struct ContextSharedPart : boost::noncopyable std::atomic_size_t max_view_num_to_warn = 10000lu; std::atomic_size_t max_dictionary_num_to_warn = 1000lu; std::atomic_size_t max_part_num_to_warn = 100000lu; + /// Only for system.server_settings, actually value stored in reloader itself + std::atomic_size_t config_reload_interval_ms = ConfigReloader::DEFAULT_RELOAD_INTERVAL.count(); + String format_schema_path; /// Path to a directory that contains schema files used by input formats. String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types. mutable OnceFlag action_locks_manager_initialized; ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers OnceFlag system_logs_initialized; std::unique_ptr system_logs TSA_GUARDED_BY(mutex); /// Used to log queries and operations on parts - std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage + std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage std::vector warnings TSA_GUARDED_BY(mutex); /// Store warning messages about server configuration. /// Background executors for *MergeTree tables @@ -610,6 +617,8 @@ struct ContextSharedPart : boost::noncopyable LOG_TRACE(log, "Shutting down database catalog"); DatabaseCatalog::shutdown(); + NamedCollectionFactory::instance().shutdown(); + delete_async_insert_queue.reset(); SHUTDOWN(log, "merges executor", merge_mutate_executor, wait()); @@ -674,6 +683,9 @@ struct ContextSharedPart : boost::noncopyable } } + LOG_TRACE(log, "Shutting down AccessControl"); + access_control->shutdown(); + { std::lock_guard lock(mutex); @@ -740,12 +752,18 @@ struct ContextSharedPart : boost::noncopyable void initializeTraceCollector(std::shared_ptr trace_log) { - if (!trace_log) - return; + if (!trace_collector.has_value()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "TraceCollector needs to be first created before initialization"); + + trace_collector->initialize(trace_log); + } + + void createTraceCollector() + { if (hasTraceCollector()) return; - trace_collector.emplace(std::move(trace_log)); + trace_collector.emplace(); } void addWarningMessage(const String & message) TSA_REQUIRES(mutex) @@ -824,6 +842,7 @@ ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) auto res = std::shared_ptr(new Context); res->shared = shared_part; res->query_access_info = std::make_shared(); + res->query_privileges_info = std::make_shared(); return res; } @@ -1416,7 +1435,7 @@ void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, void Context::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } void Context::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } -std::shared_ptr Context::getAccess() const +std::shared_ptr Context::getAccess() const { /// A helper function to collect parameters for calculating access rights, called with Context::getLocalSharedLock() acquired. auto get_params = [this]() @@ -1433,14 +1452,14 @@ std::shared_ptr Context::getAccess() const { SharedLockGuard lock(mutex); if (access && !need_recalculate_access) - return access; /// No need to recalculate access rights. + return std::make_shared(access, shared_from_this()); /// No need to recalculate access rights. params.emplace(get_params()); if (access && (access->getParams() == *params)) { need_recalculate_access = false; - return access; /// No need to recalculate access rights. + return std::make_shared(access, shared_from_this()); /// No need to recalculate access rights. } } @@ -1460,7 +1479,7 @@ std::shared_ptr Context::getAccess() const } } - return res; + return std::make_shared(res, shared_from_this()); } RowPolicyFilterPtr Context::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const @@ -1552,11 +1571,36 @@ ResourceManagerPtr Context::getResourceManager() const ClassifierPtr Context::getWorkloadClassifier() const { std::lock_guard lock(mutex); + // NOTE: Workload cannot be changed after query start, and getWorkloadClassifier() should not be called before proper `workload` is set if (!classifier) classifier = getResourceManager()->acquire(getSettingsRef().workload); return classifier; } +String Context::getMergeWorkload() const +{ + SharedLockGuard lock(shared->mutex); + return shared->merge_workload; +} + +void Context::setMergeWorkload(const String & value) +{ + std::lock_guard lock(shared->mutex); + shared->merge_workload = value; +} + +String Context::getMutationWorkload() const +{ + SharedLockGuard lock(shared->mutex); + return shared->mutation_workload; +} + +void Context::setMutationWorkload(const String & value) +{ + std::lock_guard lock(shared->mutex); + shared->mutation_workload = value; +} + Scalars Context::getScalars() const { @@ -1821,6 +1865,15 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String } } +void Context::addQueryPrivilegesInfo(const String & privilege, bool granted) const +{ + std::lock_guard lock(query_privileges_info->mutex); + if (granted) + query_privileges_info->used_privileges.emplace(privilege); + else + query_privileges_info->missing_privileges.emplace(privilege); +} + static bool findIdentifier(const ASTFunction * function) { if (!function || !function->arguments) @@ -1862,7 +1915,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const if (table.get()->isView() && table->as() && table->as()->isParameterizedView()) { auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression); + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, getQueryContext()); StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); ASTCreateQuery create; @@ -2063,7 +2116,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } -StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name) +StoragePtr Context::buildParametrizedViewStorage(const String & database_name, const String & table_name, const NameToNameMap & param_values) { if (table_name.empty()) return nullptr; @@ -2076,8 +2129,7 @@ StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression return nullptr; auto query = original_view->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression); - StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); + StorageView::replaceQueryParametersIfParametrizedView(query, param_values); ASTCreateQuery create; create.select = query->as(); @@ -2502,6 +2554,21 @@ void Context::makeQueryContext() local_read_query_throttler.reset(); local_write_query_throttler.reset(); backups_query_throttler.reset(); + query_privileges_info = std::make_shared(*query_privileges_info); +} + +void Context::makeQueryContextForMerge(const MergeTreeSettings & merge_tree_settings) +{ + makeQueryContext(); + classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes + settings.workload = merge_tree_settings.merge_workload.value.empty() ? getMergeWorkload() : merge_tree_settings.merge_workload; +} + +void Context::makeQueryContextForMutate(const MergeTreeSettings & merge_tree_settings) +{ + makeQueryContext(); + classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes + settings.workload = merge_tree_settings.mutation_workload.value.empty() ? getMutationWorkload() : merge_tree_settings.mutation_workload; } void Context::makeSessionContext() @@ -3335,8 +3402,6 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const const auto & config = shared->zookeeper_config ? *shared->zookeeper_config : getConfigRef(); if (!shared->zookeeper) shared->zookeeper = zkutil::ZooKeeper::create(config, zkutil::getZooKeeperConfigName(config), getZooKeeperLog()); - else if (shared->zookeeper->hasReachedDeadline()) - shared->zookeeper->finalize("ZooKeeper session has reached its deadline"); if (shared->zookeeper->expired()) { @@ -3891,6 +3956,11 @@ void Context::initializeSystemLogs() }); } +void Context::createTraceCollector() +{ + shared->createTraceCollector(); +} + void Context::initializeTraceCollector() { shared->initializeTraceCollector(getTraceLog()); @@ -4063,7 +4133,7 @@ std::shared_ptr Context::getFilesystemCacheLog() const return shared->system_logs->filesystem_cache_log; } -std::shared_ptr Context::getS3QueueLog() const +std::shared_ptr Context::getS3QueueLog() const { SharedLockGuard lock(shared->mutex); if (!shared->system_logs) @@ -4072,6 +4142,15 @@ std::shared_ptr Context::getS3QueueLog() const return shared->system_logs->s3_queue_log; } +std::shared_ptr Context::getAzureQueueLog() const +{ + SharedLockGuard lock(shared->mutex); + if (!shared->system_logs) + return {}; + + return shared->system_logs->azure_queue_log; +} + std::shared_ptr Context::getFilesystemReadPrefetchesLog() const { SharedLockGuard lock(shared->mutex); @@ -4103,6 +4182,13 @@ std::shared_ptr Context::getBackupLog() const std::shared_ptr Context::getBlobStorageLog() const { + bool enable_blob_storage_log = settings.enable_blob_storage_log; + if (hasQueryContext()) + enable_blob_storage_log = getQueryContext()->getSettingsRef().enable_blob_storage_log; + + if (!enable_blob_storage_log) + return {}; + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) @@ -4275,7 +4361,7 @@ void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration { std::lock_guard lock(shared->mutex); if (shared->storage_s3_settings) - shared->storage_s3_settings->loadFromConfig("s3", config, getSettingsRef()); + shared->storage_s3_settings->loadFromConfig(config, /* config_prefix */"s3", getSettingsRef()); } } @@ -4327,14 +4413,14 @@ const DistributedSettings & Context::getDistributedSettings() const return *shared->distributed_settings; } -const StorageS3Settings & Context::getStorageS3Settings() const +const S3SettingsByEndpoint & Context::getStorageS3Settings() const { std::lock_guard lock(shared->mutex); if (!shared->storage_s3_settings) { const auto & config = shared->getConfigRefWithLock(lock); - shared->storage_s3_settings.emplace().loadFromConfig("s3", config, getSettingsRef()); + shared->storage_s3_settings.emplace().loadFromConfig(config, "s3", getSettingsRef()); } return *shared->storage_s3_settings; @@ -4427,6 +4513,16 @@ void Context::checkPartitionCanBeDropped(const String & database, const String & checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop); } +void Context::setConfigReloaderInterval(size_t value_ms) +{ + shared->config_reload_interval_ms.store(value_ms, std::memory_order_relaxed); +} + +size_t Context::getConfigReloaderInterval() const +{ + return shared->config_reload_interval_ms.load(std::memory_order_relaxed); +} + InputFormatPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, const std::optional & format_settings, std::optional max_parsing_threads) const { return FormatFactory::instance().getInput(name, buf, sample, shared_from_this(), max_block_size, format_settings, max_parsing_threads); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 87a7baa0469..f9b91a45978 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -50,6 +50,7 @@ class ASTSelectQuery; struct ContextSharedPart; class ContextAccess; +class ContextAccessWrapper; struct User; using UserPtr = std::shared_ptr; struct SettingsProfilesInfo; @@ -106,7 +107,7 @@ class TransactionsInfoLog; class ProcessorsProfileLog; class FilesystemCacheLog; class FilesystemReadPrefetchesLog; -class S3QueueLog; +class ObjectStorageQueueLog; class AsynchronousInsertLog; class BackupLog; class BlobStorageLog; @@ -117,7 +118,7 @@ struct DistributedSettings; struct InitialAllRangesAnnouncement; struct ParallelReadRequest; struct ParallelReadResponse; -class StorageS3Settings; +class S3SettingsByEndpoint; class IDatabase; class DDLWorker; class ITableFunction; @@ -403,9 +404,31 @@ public: mutable std::mutex mutex; }; + struct QueryPrivilegesInfo + { + QueryPrivilegesInfo() = default; + + QueryPrivilegesInfo(const QueryPrivilegesInfo & rhs) + { + std::lock_guard lock(rhs.mutex); + used_privileges = rhs.used_privileges; + missing_privileges = rhs.missing_privileges; + } + + QueryPrivilegesInfo(QueryPrivilegesInfo && rhs) = delete; + + std::unordered_set used_privileges TSA_GUARDED_BY(mutex); + std::unordered_set missing_privileges TSA_GUARDED_BY(mutex); + + mutable std::mutex mutex; + }; + + using QueryPrivilegesInfoPtr = std::shared_ptr; + protected: /// Needs to be changed while having const context in factories methods mutable QueryFactoriesInfo query_factories_info; + QueryPrivilegesInfoPtr query_privileges_info; /// Query metrics for reading data asynchronously with IAsynchronousReader. mutable std::shared_ptr async_read_counters; @@ -612,7 +635,7 @@ public: void checkAccess(const AccessRightsElement & element) const; void checkAccess(const AccessRightsElements & elements) const; - std::shared_ptr getAccess() const; + std::shared_ptr getAccess() const; RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const; @@ -622,6 +645,10 @@ public: /// Resource management related ResourceManagerPtr getResourceManager() const; ClassifierPtr getWorkloadClassifier() const; + String getMergeWorkload() const; + void setMergeWorkload(const String & value); + String getMutationWorkload() const; + void setMutationWorkload(const String & value); /// We have to copy external tables inside executeQuery() to track limits. Therefore, set callback for it. Must set once. void setExternalTablesInitializer(ExternalTablesInitializer && initializer); @@ -737,13 +764,17 @@ public: QueryFactoriesInfo getQueryFactoriesInfo() const; void addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const; + const QueryPrivilegesInfo & getQueryPrivilegesInfo() const { return *getQueryPrivilegesInfoPtr(); } + QueryPrivilegesInfoPtr getQueryPrivilegesInfoPtr() const { return query_privileges_info; } + void addQueryPrivilegesInfo(const String & privilege, bool granted) const; + /// For table functions s3/file/url/hdfs/input we can use structure from /// insertion table depending on select expression. StoragePtr executeTableFunction(const ASTPtr & table_expression, const ASTSelectQuery * select_query_hint = nullptr); /// Overload for the new analyzer. Structure inference is performed in QueryAnalysisPass. StoragePtr executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr); - StoragePtr buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name); + StoragePtr buildParametrizedViewStorage(const String & database_name, const String & table_name, const NameToNameMap & param_values); void addViewSource(const StoragePtr & storage); StoragePtr getViewSource() const; @@ -907,6 +938,8 @@ public: void setSessionContext(ContextMutablePtr context_) { session_context = context_; } void makeQueryContext(); + void makeQueryContextForMerge(const MergeTreeSettings & merge_tree_settings); + void makeQueryContextForMutate(const MergeTreeSettings & merge_tree_settings); void makeSessionContext(); void makeGlobalContext(); @@ -1077,6 +1110,8 @@ public: void initializeSystemLogs(); /// Call after initialization before using trace collector. + void createTraceCollector(); + void initializeTraceCollector(); /// Call after unexpected crash happen. @@ -1098,7 +1133,8 @@ public: std::shared_ptr getTransactionsInfoLog() const; std::shared_ptr getProcessorsProfileLog() const; std::shared_ptr getFilesystemCacheLog() const; - std::shared_ptr getS3QueueLog() const; + std::shared_ptr getS3QueueLog() const; + std::shared_ptr getAzureQueueLog() const; std::shared_ptr getFilesystemReadPrefetchesLog() const; std::shared_ptr getAsynchronousInsertLog() const; std::shared_ptr getBackupLog() const; @@ -1113,7 +1149,7 @@ public: const MergeTreeSettings & getMergeTreeSettings() const; const MergeTreeSettings & getReplicatedMergeTreeSettings() const; const DistributedSettings & getDistributedSettings() const; - const StorageS3Settings & getStorageS3Settings() const; + const S3SettingsByEndpoint & getStorageS3Settings() const; /// Prevents DROP TABLE if its size is greater than max_size (50GB by default, max_size=0 turn off this check) void setMaxTableSizeToDrop(size_t max_size); @@ -1126,6 +1162,9 @@ public: size_t getMaxPartitionSizeToDrop() const; void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size) const; void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size, const size_t & max_partition_size_to_drop) const; + /// Only for system.server_settings, actual value is stored in ConfigReloader + void setConfigReloaderInterval(size_t value_ms); + size_t getConfigReloaderInterval() const; /// Lets you select the compression codec according to the conditions described in the configuration file. std::shared_ptr chooseCompressionCodec(size_t part_size, double part_size_ratio) const; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index a37b4db029a..6c346836ed8 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -568,8 +568,21 @@ void ZooKeeperMetadataTransaction::commit() ClusterPtr tryGetReplicatedDatabaseCluster(const String & cluster_name) { - if (const auto * replicated_db = dynamic_cast(DatabaseCatalog::instance().tryGetDatabase(cluster_name).get())) - return replicated_db->tryGetCluster(); + String name = cluster_name; + bool all_groups = false; + if (name.starts_with(DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX)) + { + name = name.substr(strlen(DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX)); + all_groups = true; + } + + if (const auto * replicated_db = dynamic_cast(DatabaseCatalog::instance().tryGetDatabase(name).get())) + { + if (all_groups) + return replicated_db->tryGetAllGroupsCluster(); + else + return replicated_db->tryGetCluster(); + } return {}; } diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 0f4c8cc26a6..aaec94a4fb0 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -63,6 +63,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int HAVE_DEPENDENT_OBJECTS; extern const int UNFINISHED; + extern const int INFINITE_LOOP; } class DatabaseNameHints : public IHints<> @@ -1473,6 +1474,114 @@ void DatabaseCatalog::checkTableCanBeRemovedOrRenamedUnlocked( removing_table, fmt::join(from_other_databases, ", ")); } +void DatabaseCatalog::checkTableCanBeAddedWithNoCyclicDependencies( + const QualifiedTableName & table_name, + const TableNamesSet & new_referential_dependencies, + const TableNamesSet & new_loading_dependencies) +{ + std::lock_guard lock{databases_mutex}; + + StorageID table_id = StorageID{table_name}; + + auto check = [&](TablesDependencyGraph & dependencies, const TableNamesSet & new_dependencies) + { + auto old_dependencies = dependencies.removeDependencies(table_id); + dependencies.addDependencies(table_name, new_dependencies); + auto restore_dependencies = [&]() + { + dependencies.removeDependencies(table_id); + if (!old_dependencies.empty()) + dependencies.addDependencies(table_id, old_dependencies); + }; + + if (dependencies.hasCyclicDependencies()) + { + auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); + restore_dependencies(); + throw Exception( + ErrorCodes::INFINITE_LOOP, + "Cannot add dependencies for '{}', because it will lead to cyclic dependencies: {}", + table_name.getFullName(), + cyclic_dependencies_description); + } + + restore_dependencies(); + }; + + check(referential_dependencies, new_referential_dependencies); + check(loading_dependencies, new_loading_dependencies); +} + +void DatabaseCatalog::checkTableCanBeRenamedWithNoCyclicDependencies(const StorageID & from_table_id, const StorageID & to_table_id) +{ + std::lock_guard lock{databases_mutex}; + + auto check = [&](TablesDependencyGraph & dependencies) + { + auto old_dependencies = dependencies.removeDependencies(from_table_id); + dependencies.addDependencies(to_table_id, old_dependencies); + auto restore_dependencies = [&]() + { + dependencies.removeDependencies(to_table_id); + dependencies.addDependencies(from_table_id, old_dependencies); + }; + + if (dependencies.hasCyclicDependencies()) + { + auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); + restore_dependencies(); + throw Exception( + ErrorCodes::INFINITE_LOOP, + "Cannot rename '{}' to '{}', because it will lead to cyclic dependencies: {}", + from_table_id.getFullTableName(), + to_table_id.getFullTableName(), + cyclic_dependencies_description); + } + + restore_dependencies(); + }; + + check(referential_dependencies); + check(loading_dependencies); +} + +void DatabaseCatalog::checkTablesCanBeExchangedWithNoCyclicDependencies(const StorageID & table_id_1, const StorageID & table_id_2) +{ + std::lock_guard lock{databases_mutex}; + + auto check = [&](TablesDependencyGraph & dependencies) + { + auto old_dependencies_1 = dependencies.removeDependencies(table_id_1); + auto old_dependencies_2 = dependencies.removeDependencies(table_id_2); + dependencies.addDependencies(table_id_1, old_dependencies_2); + dependencies.addDependencies(table_id_2, old_dependencies_1); + auto restore_dependencies = [&]() + { + dependencies.removeDependencies(table_id_1); + dependencies.removeDependencies(table_id_2); + dependencies.addDependencies(table_id_1, old_dependencies_1); + dependencies.addDependencies(table_id_2, old_dependencies_2); + }; + + if (dependencies.hasCyclicDependencies()) + { + auto cyclic_dependencies_description = dependencies.describeCyclicDependencies(); + restore_dependencies(); + throw Exception( + ErrorCodes::INFINITE_LOOP, + "Cannot exchange '{}' and '{}', because it will lead to cyclic dependencies: {}", + table_id_1.getFullTableName(), + table_id_2.getFullTableName(), + cyclic_dependencies_description); + } + + restore_dependencies(); + }; + + check(referential_dependencies); + check(loading_dependencies); +} + void DatabaseCatalog::cleanupStoreDirectoryTask() { for (const auto & [disk_name, disk] : getContext()->getDisksMap()) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 37125d9900c..17d34e96245 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -129,6 +129,7 @@ public: static constexpr const char * SYSTEM_DATABASE = "system"; static constexpr const char * INFORMATION_SCHEMA = "information_schema"; static constexpr const char * INFORMATION_SCHEMA_UPPERCASE = "INFORMATION_SCHEMA"; + static constexpr const char * DEFAULT_DATABASE = "default"; /// Returns true if a passed name is one of the predefined databases' names. static bool isPredefinedDatabase(std::string_view database_name); @@ -244,6 +245,9 @@ public: void checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool check_referential_dependencies, bool check_loading_dependencies, bool is_drop_database = false) const; + void checkTableCanBeAddedWithNoCyclicDependencies(const QualifiedTableName & table_name, const TableNamesSet & new_referential_dependencies, const TableNamesSet & new_loading_dependencies); + void checkTableCanBeRenamedWithNoCyclicDependencies(const StorageID & from_table_id, const StorageID & to_table_id); + void checkTablesCanBeExchangedWithNoCyclicDependencies(const StorageID & table_id_1, const StorageID & table_id_2); struct TableMarkedAsDropped { diff --git a/src/Interpreters/ErrorLog.cpp b/src/Interpreters/ErrorLog.cpp new file mode 100644 index 00000000000..42616f13e24 --- /dev/null +++ b/src/Interpreters/ErrorLog.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +ColumnsDescription ErrorLogElement::getColumnsDescription() +{ + ParserCodec codec_parser; + return ColumnsDescription { + { + "hostname", + std::make_shared(std::make_shared()), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Hostname of the server executing the query." + }, + { + "event_date", + std::make_shared(), + parseQuery(codec_parser, "(Delta(2), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Event date." + }, + { + "event_time", + std::make_shared(), + parseQuery(codec_parser, "(Delta(4), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Event time." + }, + { + "code", + std::make_shared(), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Error code." + }, + { + "error", + std::make_shared(std::make_shared()), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Error name." + }, + { + "value", + std::make_shared(), + parseQuery(codec_parser, "(ZSTD(3))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Number of errors happened in time interval." + }, + { + "remote", + std::make_shared(), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Remote exception (i.e. received during one of the distributed queries)." + } + }; +} + +void ErrorLogElement::appendToBlock(MutableColumns & columns) const +{ + size_t column_idx = 0; + + columns[column_idx++]->insert(getFQDNOrHostName()); + columns[column_idx++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[column_idx++]->insert(event_time); + columns[column_idx++]->insert(code); + columns[column_idx++]->insert(ErrorCodes::getName(code)); + columns[column_idx++]->insert(value); + columns[column_idx++]->insert(remote); +} + +struct ValuePair +{ + UInt64 local = 0; + UInt64 remote = 0; +}; + +void ErrorLog::stepFunction(TimePoint current_time) +{ + /// Static lazy initialization to avoid polluting the header with implementation details + static std::vector previous_values(ErrorCodes::end()); + + auto event_time = std::chrono::system_clock::to_time_t(current_time); + + for (ErrorCodes::ErrorCode code = 0, end = ErrorCodes::end(); code < end; ++code) + { + const auto & error = ErrorCodes::values[code].get(); + if (error.local.count != previous_values.at(code).local) + { + ErrorLogElement local_elem { + .event_time=event_time, + .code=code, + .value=error.local.count - previous_values.at(code).local, + .remote=false + }; + this->add(std::move(local_elem)); + previous_values[code].local = error.local.count; + } + if (error.remote.count != previous_values.at(code).remote) + { + ErrorLogElement remote_elem { + .event_time=event_time, + .code=code, + .value=error.remote.count - previous_values.at(code).remote, + .remote=true + }; + this->add(std::move(remote_elem)); + previous_values[code].remote = error.remote.count; + } + } +} + +} diff --git a/src/Interpreters/ErrorLog.h b/src/Interpreters/ErrorLog.h new file mode 100644 index 00000000000..4afe334d4de --- /dev/null +++ b/src/Interpreters/ErrorLog.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +/** ErrorLog is a log of error values measured at regular time interval. + */ + +struct ErrorLogElement +{ + time_t event_time{}; + ErrorCodes::ErrorCode code{}; + ErrorCodes::Value value{}; + bool remote{}; + + static std::string name() { return "ErrorLog"; } + static ColumnsDescription getColumnsDescription(); + static NamesAndAliases getNamesAndAliases() { return {}; } + void appendToBlock(MutableColumns & columns) const; +}; + + +class ErrorLog : public PeriodicLog +{ + using PeriodicLog::PeriodicLog; + +protected: + void stepFunction(TimePoint current_time) override; +}; + +} diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 04f29f35c3c..d832f568cb8 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -49,8 +49,9 @@ namespace ErrorCodes static std::unordered_set processShortCircuitFunctions(const ActionsDAG & actions_dag, ShortCircuitFunctionEvaluation short_circuit_function_evaluation); -ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_) - : settings(settings_) +ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_, bool project_inputs_) + : project_inputs(project_inputs_) + , settings(settings_) { actions_dag = actions_dag_->clone(); @@ -194,6 +195,10 @@ static void setLazyExecutionInfo( } lazy_execution_info.short_circuit_ancestors_info[parent].insert(indexes.begin(), indexes.end()); + /// After checking arguments_with_disabled_lazy_execution, if there is no relation with parent, + /// disable the current node. + if (indexes.empty()) + lazy_execution_info.can_be_lazy_executed = false; } else /// If lazy execution is disabled for one of parents, we should disable it for current node. @@ -291,9 +296,9 @@ static std::unordered_set processShortCircuitFunctions /// Firstly, find all short-circuit functions and get their settings. std::unordered_map short_circuit_nodes; - IFunctionBase::ShortCircuitSettings short_circuit_settings; for (const auto & node : nodes) { + IFunctionBase::ShortCircuitSettings short_circuit_settings; if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base->isShortCircuit(short_circuit_settings, node.children.size()) && !node.children.empty()) short_circuit_nodes[&node] = short_circuit_settings; } @@ -757,7 +762,7 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run, } } - if (actions_dag->isInputProjected()) + if (project_inputs) { block.clear(); } @@ -862,7 +867,7 @@ std::string ExpressionActions::dumpActions() const for (const auto & output_column : output_columns) ss << output_column.name << " " << output_column.type->getName() << "\n"; - ss << "\nproject input: " << actions_dag->isInputProjected() << "\noutput positions:"; + ss << "\noutput positions:"; for (auto pos : result_positions) ss << " " << pos; ss << "\n"; @@ -926,7 +931,6 @@ JSONBuilder::ItemPtr ExpressionActions::toTree() const map->add("Actions", std::move(actions_array)); map->add("Outputs", std::move(outputs_array)); map->add("Positions", std::move(positions_array)); - map->add("Project Input", actions_dag->isInputProjected()); return map; } @@ -980,7 +984,7 @@ void ExpressionActionsChain::addStep(NameSet non_constant_inputs) if (column.column && isColumnConst(*column.column) && non_constant_inputs.contains(column.name)) column.column = nullptr; - steps.push_back(std::make_unique(std::make_shared(columns))); + steps.push_back(std::make_unique(std::make_shared(ActionsDAG(columns), false))); } void ExpressionActionsChain::finalize() @@ -1129,14 +1133,14 @@ void ExpressionActionsChain::JoinStep::finalize(const NameSet & required_output_ std::swap(result_columns, new_result_columns); } -ActionsDAGPtr & ExpressionActionsChain::Step::actions() +ActionsAndProjectInputsFlagPtr & ExpressionActionsChain::Step::actions() { - return typeid_cast(*this).actions_dag; + return typeid_cast(*this).actions_and_flags; } -const ActionsDAGPtr & ExpressionActionsChain::Step::actions() const +const ActionsAndProjectInputsFlagPtr & ExpressionActionsChain::Step::actions() const { - return typeid_cast(*this).actions_dag; + return typeid_cast(*this).actions_and_flags; } } diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index cb467004d29..ddffe022215 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -79,11 +79,13 @@ private: ColumnNumbers result_positions; Block sample_block; + bool project_inputs = false; + ExpressionActionsSettings settings; public: ExpressionActions() = delete; - explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {}); + explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {}, bool project_inputs_ = false); ExpressionActions(const ExpressionActions &) = default; ExpressionActions & operator=(const ExpressionActions &) = default; @@ -173,48 +175,49 @@ struct ExpressionActionsChain : WithContext /// Remove unused result and update required columns virtual void finalize(const NameSet & required_output_) = 0; /// Add projections to expression - virtual void prependProjectInput() const = 0; + virtual void prependProjectInput() = 0; virtual std::string dump() const = 0; /// Only for ExpressionActionsStep - ActionsDAGPtr & actions(); - const ActionsDAGPtr & actions() const; + ActionsAndProjectInputsFlagPtr & actions(); + const ActionsAndProjectInputsFlagPtr & actions() const; }; struct ExpressionActionsStep : public Step { - ActionsDAGPtr actions_dag; + ActionsAndProjectInputsFlagPtr actions_and_flags; + bool is_final_projection = false; - explicit ExpressionActionsStep(ActionsDAGPtr actions_dag_, Names required_output_ = Names()) + explicit ExpressionActionsStep(ActionsAndProjectInputsFlagPtr actiactions_and_flags_, Names required_output_ = Names()) : Step(std::move(required_output_)) - , actions_dag(std::move(actions_dag_)) + , actions_and_flags(std::move(actiactions_and_flags_)) { } NamesAndTypesList getRequiredColumns() const override { - return actions_dag->getRequiredColumns(); + return actions_and_flags->dag.getRequiredColumns(); } ColumnsWithTypeAndName getResultColumns() const override { - return actions_dag->getResultColumns(); + return actions_and_flags->dag.getResultColumns(); } void finalize(const NameSet & required_output_) override { - if (!actions_dag->isOutputProjected()) - actions_dag->removeUnusedActions(required_output_); + if (!is_final_projection) + actions_and_flags->dag.removeUnusedActions(required_output_); } - void prependProjectInput() const override + void prependProjectInput() override { - actions_dag->projectInput(); + actions_and_flags->project_input = true; } std::string dump() const override { - return actions_dag->dumpDAG(); + return actions_and_flags->dag.dumpDAG(); } }; @@ -229,7 +232,7 @@ struct ExpressionActionsChain : WithContext NamesAndTypesList getRequiredColumns() const override { return required_columns; } ColumnsWithTypeAndName getResultColumns() const override { return result_columns; } void finalize(const NameSet & required_output_) override; - void prependProjectInput() const override {} /// TODO: remove unused columns before ARRAY JOIN ? + void prependProjectInput() override {} /// TODO: remove unused columns before ARRAY JOIN ? std::string dump() const override { return "ARRAY JOIN"; } }; @@ -245,7 +248,7 @@ struct ExpressionActionsChain : WithContext NamesAndTypesList getRequiredColumns() const override { return required_columns; } ColumnsWithTypeAndName getResultColumns() const override { return result_columns; } void finalize(const NameSet & required_output_) override; - void prependProjectInput() const override {} /// TODO: remove unused columns before JOIN ? + void prependProjectInput() override {} /// TODO: remove unused columns before JOIN ? std::string dump() const override { return "JOIN"; } }; @@ -263,7 +266,7 @@ struct ExpressionActionsChain : WithContext steps.clear(); } - ActionsDAGPtr getLastActions(bool allow_empty = false) + ExpressionActionsStep * getLastExpressionStep(bool allow_empty = false) { if (steps.empty()) { @@ -272,7 +275,15 @@ struct ExpressionActionsChain : WithContext throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty ExpressionActionsChain"); } - return typeid_cast(steps.back().get())->actions_dag; + return typeid_cast(steps.back().get()); + } + + ActionsAndProjectInputsFlagPtr getLastActions(bool allow_empty = false) + { + if (auto * step = getLastExpressionStep(allow_empty)) + return step->actions_and_flags; + + return nullptr; } Step & getLastStep() @@ -286,10 +297,15 @@ struct ExpressionActionsChain : WithContext Step & lastStep(const NamesAndTypesList & columns) { if (steps.empty()) - steps.emplace_back(std::make_unique(std::make_shared(columns))); + return addStep(columns); return *steps.back(); } + Step & addStep(const NamesAndTypesList & columns) + { + return *steps.emplace_back(std::make_unique(std::make_shared(ActionsDAG(columns), false))); + } + std::string dumpChain() const; }; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d80d5cd5b93..8e9ff9ed46c 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -186,7 +186,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers. initGlobalSubqueriesAndExternalTables(do_global, is_explain); - auto temp_actions = std::make_shared(sourceColumns()); + ActionsDAG temp_actions(sourceColumns()); columns_after_array_join = getColumnsAfterArrayJoin(temp_actions, sourceColumns()); columns_after_join = analyzeJoin(temp_actions, columns_after_array_join); /// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns. @@ -199,7 +199,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( analyzeAggregation(temp_actions); } -NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns) +NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns) { const auto * select_query = query->as(); if (!select_query) @@ -213,14 +213,14 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a getRootActionsNoMakeSet(array_join_expression_list, actions, false); auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left); - auto sample_columns = actions->getResultColumns(); + auto sample_columns = actions.getResultColumns(); array_join->prepare(sample_columns); - actions = std::make_shared(sample_columns); + actions = ActionsDAG(sample_columns); NamesAndTypesList new_columns_after_array_join; NameSet added_columns; - for (auto & column : actions->getResultColumns()) + for (auto & column : actions.getResultColumns()) { if (syntax->array_join_result_to_source.contains(column.name)) { @@ -236,7 +236,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a return new_columns_after_array_join; } -NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns) +NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns) { const auto * select_query = query->as(); if (!select_query) @@ -246,9 +246,9 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const if (join) { getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), actions, false); - auto sample_columns = actions->getNamesAndTypesList(); + auto sample_columns = actions.getNamesAndTypesList(); syntax->analyzed_join->addJoinedColumnsAndCorrectTypes(sample_columns, true); - actions = std::make_shared(sample_columns); + actions = ActionsDAG(sample_columns); } NamesAndTypesList result_columns = src_columns; @@ -256,7 +256,7 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const return result_columns; } -void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) +void ExpressionAnalyzer::analyzeAggregation(ActionsDAG & temp_actions) { /** Find aggregation keys (aggregation_keys), information about aggregate functions (aggregate_descriptions), * as well as a set of columns obtained after the aggregation, if any, @@ -272,7 +272,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) if (!has_aggregation) { - aggregated_columns = temp_actions->getNamesAndTypesList(); + aggregated_columns = temp_actions.getNamesAndTypesList(); return; } @@ -321,7 +321,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) ssize_t group_size = group_elements_ast.size(); const auto & column_name = group_elements_ast[j]->getColumnName(); - const auto * node = temp_actions->tryFindInOutputs(column_name); + const auto * node = temp_actions.tryFindInOutputs(column_name); if (!node) throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in GROUP BY): {}", column_name); @@ -375,7 +375,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) getRootActionsNoMakeSet(group_asts[i], temp_actions, false); const auto & column_name = group_asts[i]->getColumnName(); - const auto * node = temp_actions->tryFindInOutputs(column_name); + const auto * node = temp_actions.tryFindInOutputs(column_name); if (!node) throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in GROUP BY): {}", column_name); @@ -434,7 +434,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) has_const_aggregation_keys = select_query->group_by_with_constant_keys; } else - aggregated_columns = temp_actions->getNamesAndTypesList(); + aggregated_columns = temp_actions.getNamesAndTypesList(); for (const auto & desc : aggregate_descriptions) aggregated_columns.emplace_back(desc.column_name, desc.function->getResultType()); @@ -465,7 +465,7 @@ SetPtr ExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_o return storage_set->getSet(); } -void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data( @@ -485,7 +485,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAG & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data( @@ -507,7 +507,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP void ExpressionAnalyzer::getRootActionsForHaving( - const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) + const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data( @@ -528,7 +528,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( } -void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions) +void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions) { LogAST log; ActionsVisitor::Data visitor_data( @@ -548,7 +548,7 @@ void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bo } -void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions) +void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAG & actions, AggregateDescriptions & descriptions) { for (const ASTPtr & ast : aggregates()) { @@ -567,7 +567,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr for (size_t i = 0; i < arguments.size(); ++i) { const std::string & name = arguments[i]->getColumnName(); - const auto * dag_node = actions->tryFindInOutputs(name); + const auto * dag_node = actions.tryFindInOutputs(name); if (!dag_node) { throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, @@ -659,7 +659,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, 1 /* nulls_direction */)); auto actions_dag = std::make_shared(aggregated_columns); - getRootActions(column_ast, false, actions_dag); + getRootActions(column_ast, false, *actions_dag); desc.partition_by_actions.push_back(std::move(actions_dag)); } } @@ -680,7 +680,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, order_by_element.nulls_direction)); auto actions_dag = std::make_shared(aggregated_columns); - getRootActions(column_ast, false, actions_dag); + getRootActions(column_ast, false, *actions_dag); desc.order_by_actions.push_back(std::move(actions_dag)); } } @@ -720,7 +720,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, } } -void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) +void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAG & actions) { auto current_context = getContext(); @@ -737,13 +737,13 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) desc, elem.definition.get()); auto [it, inserted] = window_descriptions.insert( - {desc.window_name, desc}); + {elem.name, std::move(desc)}); if (!inserted) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window '{}' is defined twice in the WINDOW clause", - desc.window_name); + elem.name); } } } @@ -776,7 +776,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) for (size_t i = 0; i < arguments.size(); ++i) { const std::string & name = arguments[i]->getColumnName(); - const auto * node = actions->tryFindInOutputs(name); + const auto * node = actions.tryFindInOutputs(name); if (!node) { @@ -817,13 +817,14 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) { const auto & definition = function_node.window_definition->as< const ASTWindowDefinition &>(); + auto default_window_name = definition.getDefaultWindowName(); WindowDescription desc; - desc.window_name = definition.getDefaultWindowName(); + desc.window_name = default_window_name; makeWindowDescriptionFromAST(*current_context, window_descriptions, desc, &definition); auto [it, inserted] = window_descriptions.insert( - {desc.window_name, desc}); + {default_window_name, desc}); if (!inserted) { @@ -871,7 +872,7 @@ const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() cons } /// "Big" ARRAY JOIN. -ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool array_join_is_left) const +ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAG & actions, bool array_join_is_left) const { NameSet result_columns; for (const auto & result_source : syntax->array_join_result_to_source) @@ -879,8 +880,8 @@ ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr /// Assign new names to columns, if needed. if (result_source.first != result_source.second) { - const auto & node = actions->findInOutputs(result_source.second); - actions->getOutputs().push_back(&actions->addAlias(node, result_source.first)); + const auto & node = actions.findInOutputs(result_source.second); + actions.getOutputs().push_back(&actions.addAlias(node, result_source.first)); } /// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names. @@ -890,7 +891,7 @@ ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr return std::make_shared(result_columns, array_join_is_left, getContext()); } -ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types) +ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & before_array_join, bool only_types) { const auto * select_query = getSelectQuery(); @@ -900,9 +901,9 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); - getRootActions(array_join_expression_list, only_types, step.actions()); + getRootActions(array_join_expression_list, only_types, step.actions()->dag); - auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left); + auto array_join = addMultipleArrayJoinAction(step.actions()->dag, is_array_join_left); before_array_join = chain.getLastActions(); chain.steps.push_back(std::make_unique(array_join, step.getResultColumns())); @@ -916,20 +917,23 @@ bool SelectQueryExpressionAnalyzer::appendJoinLeftKeys(ExpressionActionsChain & { ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join); - getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions()); + getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions()->dag); return true; } JoinPtr SelectQueryExpressionAnalyzer::appendJoin( ExpressionActionsChain & chain, - ActionsDAGPtr & converting_join_columns) + ActionsAndProjectInputsFlagPtr & converting_join_columns) { const ColumnsWithTypeAndName & left_sample_columns = chain.getLastStep().getResultColumns(); - JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, converting_join_columns); + ActionsDAGPtr converting_actions; + JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, converting_actions); - if (converting_join_columns) + if (converting_actions) { + converting_join_columns = std::make_shared(); + converting_join_columns->dag = std::move(*converting_actions); chain.steps.push_back(std::make_unique(converting_join_columns)); chain.addStep(); } @@ -1065,7 +1069,7 @@ static std::unique_ptr buildJoinedPlan( rename_dag->getOutputs()[pos] = &alias; } } - rename_dag->projectInput(); + rename_dag->appendInputsForUnusedColumns(joined_plan->getCurrentDataStream().header); auto rename_step = std::make_unique(joined_plan->getCurrentDataStream(), std::move(rename_dag)); rename_step->setStepDescription("Rename joined columns"); joined_plan->addStep(std::move(rename_step)); @@ -1166,45 +1170,45 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin( return join; } -ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( +ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendPrewhere( ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getSelectQuery(); if (!select_query->prewhere()) - return nullptr; + return {}; Names first_action_names; if (!chain.steps.empty()) first_action_names = chain.steps.front()->getRequiredColumns().getNames(); auto & step = chain.lastStep(sourceColumns()); - getRootActions(select_query->prewhere(), only_types, step.actions()); + getRootActions(select_query->prewhere(), only_types, step.actions()->dag); String prewhere_column_name = select_query->prewhere()->getColumnName(); step.addRequiredOutput(prewhere_column_name); - const auto & node = step.actions()->findInOutputs(prewhere_column_name); + const auto & node = step.actions()->dag.findInOutputs(prewhere_column_name); auto filter_type = node.result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", filter_type->getName()); - ActionsDAGPtr prewhere_actions; + ActionsAndProjectInputsFlagPtr prewhere_actions; { /// Remove unused source_columns from prewhere actions. - auto tmp_actions_dag = std::make_shared(sourceColumns()); + ActionsDAG tmp_actions_dag(sourceColumns()); getRootActions(select_query->prewhere(), only_types, tmp_actions_dag); /// Constants cannot be removed since they can be used in other parts of the query. /// And if they are not used anywhere, except PREWHERE, they will be removed on the next step. - tmp_actions_dag->removeUnusedActions( + tmp_actions_dag.removeUnusedActions( NameSet{prewhere_column_name}, /* allow_remove_inputs= */ true, /* allow_constant_folding= */ false); - auto required_columns = tmp_actions_dag->getRequiredColumnsNames(); + auto required_columns = tmp_actions_dag.getRequiredColumnsNames(); NameSet required_source_columns(required_columns.begin(), required_columns.end()); required_source_columns.insert(first_action_names.begin(), first_action_names.end()); - auto names = step.actions()->getNames(); + auto names = step.actions()->dag.getNames(); NameSet name_set(names.begin(), names.end()); for (const auto & column : sourceColumns()) @@ -1213,13 +1217,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( Names required_output(name_set.begin(), name_set.end()); prewhere_actions = chain.getLastActions(); - prewhere_actions->removeUnusedActions(required_output); + prewhere_actions->dag.removeUnusedActions(required_output); } { - ActionsDAGPtr actions; + auto actions = std::make_shared(); - auto required_columns = prewhere_actions->getRequiredColumns(); + auto required_columns = prewhere_actions->dag.getRequiredColumns(); NameSet prewhere_input_names; for (const auto & col : required_columns) prewhere_input_names.insert(col.name); @@ -1263,11 +1267,11 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - actions = std::make_shared(std::move(required_columns)); + actions->dag = ActionsDAG(required_columns); } else { - ColumnsWithTypeAndName columns = prewhere_actions->getResultColumns(); + ColumnsWithTypeAndName columns = prewhere_actions->dag.getResultColumns(); for (const auto & column : sourceColumns()) { @@ -1278,7 +1282,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - actions = std::make_shared(std::move(columns)); + actions->dag = ActionsDAG(columns); } chain.steps.emplace_back( @@ -1300,12 +1304,12 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join); - getRootActions(select_query->where(), only_types, step.actions()); + getRootActions(select_query->where(), only_types, step.actions()->dag); auto where_column_name = select_query->where()->getColumnName(); step.addRequiredOutput(where_column_name); - const auto & node = step.actions()->findInOutputs(where_column_name); + const auto & node = step.actions()->dag.findInOutputs(where_column_name); auto filter_type = node.result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in WHERE: {}", @@ -1332,7 +1336,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain for (const auto & ast_element : ast->children) { step.addRequiredOutput(ast_element->getColumnName()); - getRootActions(ast_element, only_types, step.actions()); + getRootActions(ast_element, only_types, step.actions()->dag); } } } @@ -1341,7 +1345,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain for (const auto & ast : asts) { step.addRequiredOutput(ast->getColumnName()); - getRootActions(ast, only_types, step.actions()); + getRootActions(ast, only_types, step.actions()->dag); } } @@ -1350,7 +1354,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain for (auto & child : asts) { auto actions_dag = std::make_shared(columns_after_join); - getRootActions(child, only_types, actions_dag); + getRootActions(child, only_types, *actions_dag); group_by_elements_actions.emplace_back( std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } @@ -1387,7 +1391,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression const ASTFunction & node = typeid_cast(*ast); if (node.arguments) for (auto & argument : node.arguments->children) - getRootActions(argument, only_types, step.actions()); + getRootActions(argument, only_types, step.actions()->dag); } } @@ -1409,7 +1413,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // recursively together with (1b) as ASTFunction::window_definition. if (getSelectQuery()->window()) { - getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions()); + getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions()->dag); } for (const auto & [_, w] : window_descriptions) @@ -1420,7 +1424,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // definitions (1a). // Requiring a constant reference to a shared pointer to non-const AST // doesn't really look sane, but the visitor does indeed require it. - getRootActionsNoMakeSet(f.function_node->clone(), step.actions()); + getRootActionsNoMakeSet(f.function_node->clone(), step.actions()->dag); // (2b) Required function argument columns. for (const auto & a : f.function_node->arguments->children) @@ -1442,17 +1446,17 @@ void SelectQueryExpressionAnalyzer::appendExpressionsAfterWindowFunctions(Expres ExpressionActionsChain::Step & step = chain.lastStep(columns_after_window); for (const auto & expression : syntax->expressions_with_window_function) - getRootActionsForWindowFunctions(expression->clone(), true, step.actions()); + getRootActionsForWindowFunctions(expression->clone(), true, step.actions()->dag); } -void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAGPtr & before_aggregation, ExpressionActionsChain & chain, bool /* only_types */) +void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAG & before_aggregation, ExpressionActionsChain & chain, bool /* only_types */) { const auto * select_query = getAggregatingQuery(); if (!select_query->groupBy() || !(select_query->group_by_with_rollup || select_query->group_by_with_cube)) return; - auto source_columns = before_aggregation->getResultColumns(); + auto source_columns = before_aggregation.getResultColumns(); ColumnsWithTypeAndName result_columns; for (const auto & source_column : source_columns) @@ -1462,9 +1466,11 @@ void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAGPtr & befor else result_columns.push_back(source_column); } - ExpressionActionsChain::Step & step = chain.lastStep(before_aggregation->getNamesAndTypesList()); + auto required_output = chain.getLastStep().required_output; + ExpressionActionsChain::Step & step = chain.addStep(before_aggregation.getNamesAndTypesList()); + step.required_output = std::move(required_output); - step.actions() = ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position); + step.actions()->dag = std::move(*ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position)); } void SelectQueryExpressionAnalyzer::appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node) @@ -1495,7 +1501,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActionsForHaving(select_query->having(), only_types, step.actions()); + getRootActionsForHaving(select_query->having(), only_types, step.actions()->dag); step.addRequiredOutput(select_query->having()->getColumnName()); @@ -1508,13 +1514,13 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActions(select_query->select(), only_types, step.actions()); + getRootActions(select_query->select(), only_types, step.actions()->dag); for (const auto & child : select_query->select()->children) appendSelectSkipWindowExpressions(step, child); } -ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, +ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions & order_by_elements_actions) { const auto * select_query = getSelectQuery(); @@ -1538,7 +1544,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai replaceForPositionalArguments(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY); } - getRootActions(select_query->orderBy(), only_types, step.actions()); + getRootActions(select_query->orderBy(), only_types, step.actions()->dag); bool with_fill = false; @@ -1601,7 +1607,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai for (const auto & child : select_query->orderBy()->children) { auto actions_dag = std::make_shared(columns_after_join); - getRootActions(child, only_types, actions_dag); + getRootActions(child, only_types, *actions_dag); order_by_elements_actions.emplace_back( std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } @@ -1628,7 +1634,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActions(select_query->limitBy(), only_types, step.actions()); + getRootActions(select_query->limitBy(), only_types, step.actions()->dag); NameSet existing_column_names; for (const auto & column : aggregated_columns) @@ -1657,7 +1663,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain return true; } -ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const +ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const { const auto * select_query = getSelectQuery(); @@ -1705,17 +1711,20 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio } } - auto actions = chain.getLastActions(); - actions->project(result_columns); + auto * last_step = chain.getLastExpressionStep(); + auto & actions = last_step->actions_and_flags; + actions->dag.project(result_columns); if (!required_result_columns.empty()) { result_columns.clear(); for (const auto & column : required_result_columns) result_columns.emplace_back(column, std::string{}); - actions->project(result_columns); + actions->dag.project(result_columns); } + actions->project_input = true; + last_step->is_final_projection = true; return actions; } @@ -1723,14 +1732,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types) { ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); - getRootActions(expr, only_types, step.actions()); + getRootActions(expr, only_types, step.actions()->dag); step.addRequiredOutput(expr->getColumnName()); } - -ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_result) +ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_unused_result) { - auto actions_dag = std::make_shared(aggregated_columns); + ActionsDAG actions_dag(aggregated_columns); NamesWithAliases result_columns; Names result_names; @@ -1756,13 +1764,15 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r if (add_aliases) { - if (project_result) - actions_dag->project(result_columns); + if (remove_unused_result) + { + actions_dag.project(result_columns); + } else - actions_dag->addAliases(result_columns); + actions_dag.addAliases(result_columns); } - if (!(add_aliases && project_result)) + if (!(add_aliases && remove_unused_result)) { NameSet name_set(result_names.begin(), result_names.end()); /// We will not delete the original columns. @@ -1775,22 +1785,22 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r } } - actions_dag->removeUnusedActions(name_set); + actions_dag.removeUnusedActions(name_set); } - return actions_dag; + return std::make_unique(std::move(actions_dag)); } -ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result, CompileExpressions compile_expressions) +ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool remove_unused_result, CompileExpressions compile_expressions) { return std::make_shared( - getActionsDAG(add_aliases, project_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions)); + getActionsDAG(add_aliases, remove_unused_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions), add_aliases && remove_unused_result); } ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs) { auto actions = std::make_shared(constant_inputs); - getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */); + getRootActions(query, true /* no_makeset_for_subqueries */, *actions, true /* only_consts */); return actions; } @@ -1805,7 +1815,7 @@ std::unique_ptr SelectQueryExpressionAnalyzer::getJoinedPlan() return std::move(joined_plan); } -ActionsDAGPtr SelectQueryExpressionAnalyzer::simpleSelectActions() +ActionsAndProjectInputsFlagPtr SelectQueryExpressionAnalyzer::simpleSelectActions() { ExpressionActionsChain new_chain(getContext()); appendSelect(new_chain, false); @@ -1845,14 +1855,16 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ssize_t where_step_num = -1; ssize_t having_step_num = -1; + ActionsAndProjectInputsFlagPtr prewhere_dag_and_flags; + auto finalize_chain = [&](ExpressionActionsChain & chain) -> ColumnsWithTypeAndName { if (prewhere_step_num >= 0) { ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); - auto required_columns_ = prewhere_info->prewhere_actions->getRequiredColumnsNames(); - NameSet required_source_columns(required_columns_.begin(), required_columns_.end()); + auto prewhere_required_columns = prewhere_dag_and_flags->dag.getRequiredColumnsNames(); + NameSet required_source_columns(prewhere_required_columns.begin(), prewhere_required_columns.end()); /// Add required columns to required output in order not to remove them after prewhere execution. /// TODO: add sampling and final execution to common chain. for (const auto & column : additional_required_columns_after_prewhere) @@ -1864,6 +1876,13 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( chain.finalize(); + if (prewhere_dag_and_flags) + { + auto dag = std::make_shared(std::move(prewhere_dag_and_flags->dag)); + prewhere_info = std::make_shared(std::move(dag), query.prewhere()->getColumnName()); + prewhere_dag_and_flags.reset(); + } + finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); auto res = chain.getLastStep().getResultColumns(); @@ -1914,19 +1933,19 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( filter_info->do_remove_column = true; } - if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage)) + if (prewhere_dag_and_flags = query_analyzer.appendPrewhere(chain, !first_stage); prewhere_dag_and_flags) { /// Prewhere is always the first one. prewhere_step_num = 0; - prewhere_info = std::make_shared(actions, query.prewhere()->getColumnName()); - if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings)) + if (allowEarlyConstantFolding(prewhere_dag_and_flags->dag, settings)) { Block before_prewhere_sample = source_header; if (sanitizeBlock(before_prewhere_sample)) { + auto dag = prewhere_dag_and_flags->dag.clone(); ExpressionActions( - prewhere_info->prewhere_actions, + dag, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample); auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName()); /// If the filter column is a constant, record it. @@ -1950,7 +1969,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( { where_step_num = chain.steps.size() - 1; before_where = chain.getLastActions(); - if (allowEarlyConstantFolding(*before_where, settings)) + if (allowEarlyConstantFolding(before_where->dag, settings)) { Block before_where_sample; if (chain.steps.size() > 1) @@ -1960,7 +1979,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (sanitizeBlock(before_where_sample)) { ExpressionActions( - before_where, + before_where->dag.clone(), ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); auto & column_elem @@ -1986,7 +2005,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_aggregation = chain.getLastActions(); if (settings.group_by_use_nulls) - query_analyzer.appendGroupByModifiers(before_aggregation, chain, only_types); + query_analyzer.appendGroupByModifiers(before_aggregation->dag, chain, only_types); auto columns_before_aggregation = finalize_chain(chain); @@ -2033,8 +2052,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( true); auto & step = chain.lastStep(query_analyzer.aggregated_columns); - auto & actions = step.actions(); - actions = ActionsDAG::merge(std::move(*actions), std::move(*converting)); + auto & actions = step.actions()->dag; + actions = std::move(*ActionsDAG::merge(std::move(actions), std::move(*converting))); } } @@ -2070,13 +2089,13 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( // the main SELECT, similar to what we do for aggregate functions. if (has_window) { - query_analyzer.makeWindowDescriptions(chain.getLastActions()); + query_analyzer.makeWindowDescriptions(chain.getLastActions()->dag); query_analyzer.appendWindowFunctionsArguments(chain, only_types || !first_stage); // Build a list of output columns of the window step. // 1) We need the columns that are the output of ExpressionActions. - for (const auto & x : chain.getLastActions()->getNamesAndTypesList()) + for (const auto & x : chain.getLastActions()->dag.getNamesAndTypesList()) { query_analyzer.columns_after_window.push_back(x); } @@ -2113,7 +2132,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( finalize_chain(chain); query_analyzer.appendExpressionsAfterWindowFunctions(chain, only_types || !first_stage); - for (const auto & x : chain.getLastActions()->getNamesAndTypesList()) + for (const auto & x : chain.getLastActions()->dag.getNamesAndTypesList()) { query_analyzer.columns_after_window.push_back(x); } @@ -2173,7 +2192,6 @@ void ExpressionAnalysisResult::finalize( if (prewhere_step_num >= 0) { const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); - prewhere_info->prewhere_actions->projectInput(false); NameSet columns_to_remove; for (const auto & [name, can_remove] : step.required_output) @@ -2206,9 +2224,9 @@ void ExpressionAnalysisResult::finalize( void ExpressionAnalysisResult::removeExtraColumns() const { if (hasWhere()) - before_where->projectInput(); + before_where->project_input = true; if (hasHaving()) - before_having->projectInput(); + before_having->project_input = true; } void ExpressionAnalysisResult::checkActions() const @@ -2238,7 +2256,7 @@ std::string ExpressionAnalysisResult::dump() const if (before_array_join) { - ss << "before_array_join " << before_array_join->dumpDAG() << "\n"; + ss << "before_array_join " << before_array_join->dag.dumpDAG() << "\n"; } if (array_join) @@ -2248,12 +2266,12 @@ std::string ExpressionAnalysisResult::dump() const if (before_join) { - ss << "before_join " << before_join->dumpDAG() << "\n"; + ss << "before_join " << before_join->dag.dumpDAG() << "\n"; } if (before_where) { - ss << "before_where " << before_where->dumpDAG() << "\n"; + ss << "before_where " << before_where->dag.dumpDAG() << "\n"; } if (prewhere_info) @@ -2268,32 +2286,32 @@ std::string ExpressionAnalysisResult::dump() const if (before_aggregation) { - ss << "before_aggregation " << before_aggregation->dumpDAG() << "\n"; + ss << "before_aggregation " << before_aggregation->dag.dumpDAG() << "\n"; } if (before_having) { - ss << "before_having " << before_having->dumpDAG() << "\n"; + ss << "before_having " << before_having->dag.dumpDAG() << "\n"; } if (before_window) { - ss << "before_window " << before_window->dumpDAG() << "\n"; + ss << "before_window " << before_window->dag.dumpDAG() << "\n"; } if (before_order_by) { - ss << "before_order_by " << before_order_by->dumpDAG() << "\n"; + ss << "before_order_by " << before_order_by->dag.dumpDAG() << "\n"; } if (before_limit_by) { - ss << "before_limit_by " << before_limit_by->dumpDAG() << "\n"; + ss << "before_limit_by " << before_limit_by->dag.dumpDAG() << "\n"; } if (final_projection) { - ss << "final_projection " << final_projection->dumpDAG() << "\n"; + ss << "final_projection " << final_projection->dag.dumpDAG() << "\n"; } if (!selected_columns.empty()) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 941194e69ff..12d6dce8f72 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -115,10 +115,10 @@ public: /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression. /// If add_aliases, only the calculated values in the desired order and add aliases. - /// If also project_result, than only aliases remain in the output block. + /// If also remove_unused_result, than only aliases remain in the output block. /// Otherwise, only temporary columns will be deleted from the block. - ActionsDAGPtr getActionsDAG(bool add_aliases, bool project_result = true); - ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true, CompileExpressions compile_expressions = CompileExpressions::no); + ActionsDAGPtr getActionsDAG(bool add_aliases, bool remove_unused_result = true); + ExpressionActionsPtr getActions(bool add_aliases, bool remove_unused_result = true, CompileExpressions compile_expressions = CompileExpressions::no); /// Get actions to evaluate a constant expression. The function adds constants and applies functions that depend only on constants. /// Does not execute subqueries. @@ -139,7 +139,7 @@ public: const WindowDescriptions & windowDescriptions() const { return window_descriptions; } void makeWindowDescriptionFromAST(const Context & context, const WindowDescriptions & existing_descriptions, WindowDescription & desc, const IAST * ast); - void makeWindowDescriptions(ActionsDAGPtr actions); + void makeWindowDescriptions(ActionsDAG & actions); /** Checks if subquery is not a plain StorageSet. * Because while making set we will read data from StorageSet which is not allowed. @@ -172,34 +172,34 @@ protected: /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. void initGlobalSubqueriesAndExternalTables(bool do_global, bool is_explain); - ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; + ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAG & actions, bool is_left) const; - void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts = false); /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the * prepared sets would not be applicable for MergeTree index optimization. */ - void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAG & actions, bool only_consts = false); - void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts = false); - void getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions); + void getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions); /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, * Create a set of columns aggregated_columns resulting after the aggregation, if any, * or after all the actions that are normally performed before aggregation. * Set has_aggregation = true if there is GROUP BY or at least one aggregate function. */ - void analyzeAggregation(ActionsDAGPtr & temp_actions); - void makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions); + void analyzeAggregation(ActionsDAG & temp_actions); + void makeAggregateDescriptions(ActionsDAG & actions, AggregateDescriptions & descriptions); const ASTSelectQuery * getSelectQuery() const; bool isRemoteStorage() const; - NamesAndTypesList getColumnsAfterArrayJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns); - NamesAndTypesList analyzeJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns); + NamesAndTypesList getColumnsAfterArrayJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns); + NamesAndTypesList analyzeJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns); AggregationKeysInfo getAggregationKeysInfo() const noexcept { @@ -231,20 +231,20 @@ struct ExpressionAnalysisResult bool use_grouping_set_key = false; - ActionsDAGPtr before_array_join; + ActionsAndProjectInputsFlagPtr before_array_join; ArrayJoinActionPtr array_join; - ActionsDAGPtr before_join; - ActionsDAGPtr converting_join_columns; + ActionsAndProjectInputsFlagPtr before_join; + ActionsAndProjectInputsFlagPtr converting_join_columns; JoinPtr join; - ActionsDAGPtr before_where; - ActionsDAGPtr before_aggregation; - ActionsDAGPtr before_having; + ActionsAndProjectInputsFlagPtr before_where; + ActionsAndProjectInputsFlagPtr before_aggregation; + ActionsAndProjectInputsFlagPtr before_having; String having_column_name; bool remove_having_filter = false; - ActionsDAGPtr before_window; - ActionsDAGPtr before_order_by; - ActionsDAGPtr before_limit_by; - ActionsDAGPtr final_projection; + ActionsAndProjectInputsFlagPtr before_window; + ActionsAndProjectInputsFlagPtr before_order_by; + ActionsAndProjectInputsFlagPtr before_limit_by; + ActionsAndProjectInputsFlagPtr final_projection; /// Columns from the SELECT list, before renaming them to aliases. Used to /// perform SELECT DISTINCT. @@ -351,12 +351,12 @@ public: /// Tables that will need to be sent to remote servers for distributed query processing. const TemporaryTablesMapping & getExternalTables() const { return external_tables; } - ActionsDAGPtr simpleSelectActions(); + ActionsAndProjectInputsFlagPtr simpleSelectActions(); /// These appends are public only for tests void appendSelect(ExpressionActionsChain & chain, bool only_types); /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. - ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const; + ActionsAndProjectInputsFlagPtr appendProjectResult(ExpressionActionsChain & chain) const; private: StorageMetadataPtr metadata_snapshot; @@ -386,13 +386,13 @@ private: */ /// Before aggregation: - ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types); + ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & before_array_join, bool only_types); bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types); - JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns); + JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsAndProjectInputsFlagPtr & converting_join_columns); /// remove_filter is set in ExpressionActionsChain::finalize(); /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). - ActionsDAGPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types); + ActionsAndProjectInputsFlagPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types); bool appendWhere(ExpressionActionsChain & chain, bool only_types); bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &); void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); @@ -401,12 +401,12 @@ private: void appendExpressionsAfterWindowFunctions(ExpressionActionsChain & chain, bool only_types); void appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node); - void appendGroupByModifiers(ActionsDAGPtr & before_aggregation, ExpressionActionsChain & chain, bool only_types); + void appendGroupByModifiers(ActionsDAG & before_aggregation, ExpressionActionsChain & chain, bool only_types); /// After aggregation: bool appendHaving(ExpressionActionsChain & chain, bool only_types); /// appendSelect - ActionsDAGPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &); + ActionsAndProjectInputsFlagPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &); bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); /// appendProjectResult }; diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4dd2f89b90a..6970048269b 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp deleted file mode 100644 index 75da8bbc3e7..00000000000 --- a/src/Interpreters/HashJoin.cpp +++ /dev/null @@ -1,2871 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include "Core/Joins.h" -#include "Interpreters/TemporaryDataOnDisk.h" - -#include -#include - -namespace CurrentMetrics -{ - extern const Metric TemporaryFilesForJoin; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; - extern const int NO_SUCH_COLUMN_IN_TABLE; - extern const int INCOMPATIBLE_TYPE_OF_JOIN; - extern const int UNSUPPORTED_JOIN_KEYS; - extern const int LOGICAL_ERROR; - extern const int SYNTAX_ERROR; - extern const int SET_SIZE_LIMIT_EXCEEDED; - extern const int TYPE_MISMATCH; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int INVALID_JOIN_ON_EXPRESSION; -} - -namespace -{ - -struct NotProcessedCrossJoin : public ExtraBlock -{ - size_t left_position; - size_t right_block; - std::unique_ptr reader; -}; - - -Int64 getCurrentQueryMemoryUsage() -{ - /// Use query-level memory tracker - if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) - if (auto * memory_tracker = memory_tracker_child->getParent()) - return memory_tracker->get(); - return 0; -} - -} - -namespace JoinStuff -{ - /// for single disjunct - bool JoinUsedFlags::getUsedSafe(size_t i) const - { - return getUsedSafe(nullptr, i); - } - - /// for multiple disjuncts - bool JoinUsedFlags::getUsedSafe(const Block * block_ptr, size_t row_idx) const - { - if (auto it = flags.find(block_ptr); it != flags.end()) - return it->second[row_idx].load(); - return !need_flags; - } - - /// for single disjunct - template - void JoinUsedFlags::reinit(size_t size) - { - if constexpr (MapGetter::flagged) - { - assert(flags[nullptr].size() <= size); - need_flags = true; - // For one disjunct clause case, we don't need to reinit each time we call addBlockToJoin. - // and there is no value inserted in this JoinUsedFlags before addBlockToJoin finish. - // So we reinit only when the hash table is rehashed to a larger size. - if (flags.empty() || flags[nullptr].size() < size) [[unlikely]] - { - flags[nullptr] = std::vector(size); - } - } - } - - /// for multiple disjuncts - template - void JoinUsedFlags::reinit(const Block * block_ptr) - { - if constexpr (MapGetter::flagged) - { - assert(flags[block_ptr].size() <= block_ptr->rows()); - need_flags = true; - flags[block_ptr] = std::vector(block_ptr->rows()); - } - } - - template - void JoinUsedFlags::setUsed(const FindResult & f) - { - if constexpr (!use_flags) - return; - - /// Could be set simultaneously from different threads. - if constexpr (flag_per_row) - { - auto & mapped = f.getMapped(); - flags[mapped.block][mapped.row_num].store(true, std::memory_order_relaxed); - } - else - { - flags[nullptr][f.getOffset()].store(true, std::memory_order_relaxed); - } - } - - template - void JoinUsedFlags::setUsed(const Block * block, size_t row_num, size_t offset) - { - if constexpr (!use_flags) - return; - - /// Could be set simultaneously from different threads. - if constexpr (flag_per_row) - { - flags[block][row_num].store(true, std::memory_order_relaxed); - } - else - { - flags[nullptr][offset].store(true, std::memory_order_relaxed); - } - } - - template - bool JoinUsedFlags::getUsed(const FindResult & f) - { - if constexpr (!use_flags) - return true; - - if constexpr (flag_per_row) - { - auto & mapped = f.getMapped(); - return flags[mapped.block][mapped.row_num].load(); - } - else - { - return flags[nullptr][f.getOffset()].load(); - } - } - - template - bool JoinUsedFlags::setUsedOnce(const FindResult & f) - { - if constexpr (!use_flags) - return true; - - if constexpr (flag_per_row) - { - auto & mapped = f.getMapped(); - - /// fast check to prevent heavy CAS with seq_cst order - if (flags[mapped.block][mapped.row_num].load(std::memory_order_relaxed)) - return false; - - bool expected = false; - return flags[mapped.block][mapped.row_num].compare_exchange_strong(expected, true); - } - else - { - auto off = f.getOffset(); - - /// fast check to prevent heavy CAS with seq_cst order - if (flags[nullptr][off].load(std::memory_order_relaxed)) - return false; - - bool expected = false; - return flags[nullptr][off].compare_exchange_strong(expected, true); - } - } -} - -static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) -{ - if (nullable) - { - JoinCommon::convertColumnToNullable(column); - } - else - { - /// We have to replace values masked by NULLs with defaults. - if (column.column) - if (const auto * nullable_column = checkAndGetColumn(&*column.column)) - column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); - - JoinCommon::removeColumnNullability(column); - } -} - -static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map) -{ - if (nullable) - { - JoinCommon::convertColumnToNullable(column); - if (column.type->isNullable() && !negative_null_map.empty()) - { - MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column)); - assert_cast(*mutable_column).applyNegatedNullMap(negative_null_map); - column.column = std::move(mutable_column); - } - } - else - JoinCommon::removeColumnNullability(column); -} - -HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_sample_block_, - bool any_take_last_row_, size_t reserve_num_, const String & instance_id_) - : table_join(table_join_) - , kind(table_join->kind()) - , strictness(table_join->strictness()) - , any_take_last_row(any_take_last_row_) - , reserve_num(reserve_num_) - , instance_id(instance_id_) - , asof_inequality(table_join->getAsofInequality()) - , data(std::make_shared()) - , tmp_data( - table_join_->getTempDataOnDisk() - ? std::make_unique(table_join_->getTempDataOnDisk(), CurrentMetrics::TemporaryFilesForJoin) - : nullptr) - , right_sample_block(right_sample_block_) - , max_joined_block_rows(table_join->maxJoinedBlockRows()) - , instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "") - , log(getLogger("HashJoin")) -{ - LOG_TRACE(log, "{}Keys: {}, datatype: {}, kind: {}, strictness: {}, right header: {}", - instance_log_id, TableJoin::formatClauses(table_join->getClauses(), true), data->type, kind, strictness, right_sample_block.dumpStructure()); - - validateAdditionalFilterExpression(table_join->getMixedJoinExpression()); - - if (isCrossOrComma(kind)) - { - data->type = Type::CROSS; - sample_block_with_columns_to_add = right_sample_block; - } - else if (table_join->getClauses().empty()) - { - data->type = Type::EMPTY; - /// We might need to insert default values into the right columns, materialize them - sample_block_with_columns_to_add = materializeBlock(right_sample_block); - } - else if (table_join->oneDisjunct()) - { - const auto & key_names_right = table_join->getOnlyClause().key_names_right; - JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys, sample_block_with_columns_to_add); - required_right_keys = table_join->getRequiredRightKeys(right_table_keys, required_right_keys_sources); - } - else - { - /// required right keys concept does not work well if multiple disjuncts, we need all keys - sample_block_with_columns_to_add = right_table_keys = materializeBlock(right_sample_block); - } - - materializeBlockInplace(right_table_keys); - initRightBlockStructure(data->sample_block); - data->sample_block = prepareRightBlock(data->sample_block); - - JoinCommon::createMissedColumns(sample_block_with_columns_to_add); - - size_t disjuncts_num = table_join->getClauses().size(); - data->maps.resize(disjuncts_num); - key_sizes.reserve(disjuncts_num); - - for (const auto & clause : table_join->getClauses()) - { - const auto & key_names_right = clause.key_names_right; - ColumnRawPtrs key_columns = JoinCommon::extractKeysForJoin(right_table_keys, key_names_right); - - if (strictness == JoinStrictness::Asof) - { - assert(disjuncts_num == 1); - - /// @note ASOF JOIN is not INNER. It's better avoid use of 'INNER ASOF' combination in messages. - /// In fact INNER means 'LEFT SEMI ASOF' while LEFT means 'LEFT OUTER ASOF'. - if (!isLeft(kind) && !isInner(kind)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Wrong ASOF JOIN type. Only ASOF and LEFT ASOF joins are supported"); - - if (key_columns.size() <= 1) - throw Exception(ErrorCodes::SYNTAX_ERROR, "ASOF join needs at least one equi-join column"); - - size_t asof_size; - asof_type = SortedLookupVectorBase::getTypeSize(*key_columns.back(), asof_size); - key_columns.pop_back(); - - /// this is going to set up the appropriate hash table for the direct lookup part of the join - /// However, this does not depend on the size of the asof join key (as that goes into the BST) - /// Therefore, add it back in such that it can be extracted appropriately from the full stored - /// key_columns and key_sizes - auto & asof_key_sizes = key_sizes.emplace_back(); - data->type = chooseMethod(kind, key_columns, asof_key_sizes); - asof_key_sizes.push_back(asof_size); - } - else - { - /// Choose data structure to use for JOIN. - auto current_join_method = chooseMethod(kind, key_columns, key_sizes.emplace_back()); - if (data->type == Type::EMPTY) - data->type = current_join_method; - else if (data->type != current_join_method) - data->type = Type::hashed; - } - } - - for (auto & maps : data->maps) - dataMapInit(maps); -} - -HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes) -{ - size_t keys_size = key_columns.size(); - - if (keys_size == 0) - { - if (isCrossOrComma(kind)) - return Type::CROSS; - return Type::EMPTY; - } - - bool all_fixed = true; - size_t keys_bytes = 0; - key_sizes.resize(keys_size); - for (size_t j = 0; j < keys_size; ++j) - { - if (!key_columns[j]->isFixedAndContiguous()) - { - all_fixed = false; - break; - } - key_sizes[j] = key_columns[j]->sizeOfValueIfFixed(); - keys_bytes += key_sizes[j]; - } - - /// If there is one numeric key that fits in 64 bits - if (keys_size == 1 && key_columns[0]->isNumeric()) - { - size_t size_of_field = key_columns[0]->sizeOfValueIfFixed(); - if (size_of_field == 1) - return Type::key8; - if (size_of_field == 2) - return Type::key16; - if (size_of_field == 4) - return Type::key32; - if (size_of_field == 8) - return Type::key64; - if (size_of_field == 16) - return Type::keys128; - if (size_of_field == 32) - return Type::keys256; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); - } - - /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys - if (all_fixed && keys_bytes <= 16) - return Type::keys128; - if (all_fixed && keys_bytes <= 32) - return Type::keys256; - - /// If there is single string key, use hash table of it's values. - if (keys_size == 1) - { - auto is_string_column = [](const IColumn * column_ptr) -> bool - { - if (const auto * lc_column_ptr = typeid_cast(column_ptr)) - return typeid_cast(lc_column_ptr->getDictionary().getNestedColumn().get()); - return typeid_cast(column_ptr); - }; - - const auto * key_column = key_columns[0]; - if (is_string_column(key_column) || - (isColumnConst(*key_column) && is_string_column(assert_cast(key_column)->getDataColumnPtr().get()))) - return Type::key_string; - } - - if (keys_size == 1 && typeid_cast(key_columns[0])) - return Type::key_fixed_string; - - /// Otherwise, will use set of cryptographic hashes of unambiguously serialized values. - return Type::hashed; -} - -template -static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) -{ - if constexpr (is_asof_join) - { - auto key_column_copy = key_columns; - auto key_size_copy = key_sizes; - key_column_copy.pop_back(); - key_size_copy.pop_back(); - return KeyGetter(key_column_copy, key_size_copy, nullptr); - } - else - return KeyGetter(key_columns, key_sizes, nullptr); -} - -template -using FindResultImpl = ColumnsHashing::columns_hashing_impl::FindResultImpl; - -/// Dummy key getter, always find nothing, used for JOIN ON NULL -template -class KeyGetterEmpty -{ -public: - struct MappedType - { - using mapped_type = Mapped; - }; - - using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl; - - KeyGetterEmpty() = default; - - FindResult findKey(MappedType, size_t, const Arena &) { return FindResult(); } -}; - -template -struct KeyGetterForTypeImpl; - -constexpr bool use_offset = true; - -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodOneNumber; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodOneNumber; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodOneNumber; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodOneNumber; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodString; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodFixedString; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodKeysFixed; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodKeysFixed; -}; -template struct KeyGetterForTypeImpl -{ - using Type = ColumnsHashing::HashMethodHashed; -}; - -template -struct KeyGetterForType -{ - using Value = typename Data::value_type; - using Mapped_t = typename Data::mapped_type; - using Mapped = std::conditional_t, const Mapped_t, Mapped_t>; - using Type = typename KeyGetterForTypeImpl::Type; -}; - -void HashJoin::dataMapInit(MapsVariant & map) -{ - if (kind == JoinKind::Cross) - return; - joinDispatchInit(kind, strictness, map); - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.create(data->type); }); - - if (reserve_num) - { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); }); - } - - if (!data) - throw Exception(ErrorCodes::LOGICAL_ERROR, "HashJoin::dataMapInit called with empty data"); -} - -bool HashJoin::empty() const -{ - return data->type == Type::EMPTY; -} - -bool HashJoin::alwaysReturnsEmptySet() const -{ - return isInnerOrRight(getKind()) && data->empty; -} - -size_t HashJoin::getTotalRowCount() const -{ - if (!data) - return 0; - - size_t res = 0; - - if (data->type == Type::CROSS) - { - for (const auto & block : data->blocks) - res += block.rows(); - } - else - { - for (const auto & map : data->maps) - { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalRowCount(data->type); }); - } - } - - return res; -} - -size_t HashJoin::getTotalByteCount() const -{ - if (!data) - return 0; - -#ifndef NDEBUG - size_t debug_blocks_allocated_size = 0; - for (const auto & block : data->blocks) - debug_blocks_allocated_size += block.allocatedBytes(); - - if (data->blocks_allocated_size != debug_blocks_allocated_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_allocated_size != debug_blocks_allocated_size ({} != {})", - data->blocks_allocated_size, debug_blocks_allocated_size); - - size_t debug_blocks_nullmaps_allocated_size = 0; - for (const auto & nullmap : data->blocks_nullmaps) - debug_blocks_nullmaps_allocated_size += nullmap.second->allocatedBytes(); - - if (data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})", - data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size); -#endif - - size_t res = 0; - - res += data->blocks_allocated_size; - res += data->blocks_nullmaps_allocated_size; - res += data->pool.allocatedBytes(); - - if (data->type != Type::CROSS) - { - for (const auto & map : data->maps) - { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); }); - } - } - return res; -} - -namespace -{ - /// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. - template - struct Inserter - { - static ALWAYS_INLINE bool insertOne(const HashJoin & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, - Arena & pool) - { - auto emplace_result = key_getter.emplaceKey(map, i, pool); - - if (emplace_result.isInserted() || join.anyTakeLastRow()) - { - new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i); - return true; - } - return false; - } - - static ALWAYS_INLINE void insertAll(const HashJoin &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) - { - auto emplace_result = key_getter.emplaceKey(map, i, pool); - - if (emplace_result.isInserted()) - new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i); - else - { - /// The first element of the list is stored in the value of the hash table, the rest in the pool. - emplace_result.getMapped().insert({stored_block, i}, pool); - } - } - - static ALWAYS_INLINE void insertAsof(HashJoin & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, - const IColumn & asof_column) - { - auto emplace_result = key_getter.emplaceKey(map, i, pool); - typename Map::mapped_type * time_series_map = &emplace_result.getMapped(); - - TypeIndex asof_type = *join.getAsofType(); - if (emplace_result.isInserted()) - time_series_map = new (time_series_map) typename Map::mapped_type(createAsofRowRef(asof_type, join.getAsofInequality())); - (*time_series_map)->insert(asof_column, stored_block, i); - } - }; - - - template - size_t NO_INLINE insertFromBlockImplTypeCase( - HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) - { - [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; - constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; - - const IColumn * asof_column [[maybe_unused]] = nullptr; - if constexpr (is_asof_join) - asof_column = key_columns.back(); - - auto key_getter = createKeyGetter(key_columns, key_sizes); - - /// For ALL and ASOF join always insert values - is_inserted = !mapped_one || is_asof_join; - - for (size_t i = 0; i < rows; ++i) - { - if (null_map && (*null_map)[i]) - { - /// nulls are not inserted into hash table, - /// keep them for RIGHT and FULL joins - is_inserted = true; - continue; - } - - /// Check condition for right table from ON section - if (join_mask && !(*join_mask)[i]) - continue; - - if constexpr (is_asof_join) - Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column); - else if constexpr (mapped_one) - is_inserted |= Inserter::insertOne(join, map, key_getter, stored_block, i, pool); - else - Inserter::insertAll(join, map, key_getter, stored_block, i, pool); - } - return map.getBufferSizeInCells(); - } - - template - size_t insertFromBlockImpl( - HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) - { - switch (type) - { - case HashJoin::Type::EMPTY: - [[fallthrough]]; - case HashJoin::Type::CROSS: - /// Do nothing. We will only save block, and it is enough - is_inserted = true; - return 0; - - #define M(TYPE) \ - case HashJoin::Type::TYPE: \ - return insertFromBlockImplTypeCase>::Type>(\ - join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ - break; - - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - } - } -} - -void HashJoin::initRightBlockStructure(Block & saved_block_sample) -{ - if (isCrossOrComma(kind)) - { - /// cross join doesn't have keys, just add all columns - saved_block_sample = sample_block_with_columns_to_add.cloneEmpty(); - return; - } - - bool multiple_disjuncts = !table_join->oneDisjunct(); - /// We could remove key columns for LEFT | INNER HashJoin but we should keep them for JoinSwitcher (if any). - bool save_key_columns = table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO) || - table_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH) || - isRightOrFull(kind) || - multiple_disjuncts || - table_join->getMixedJoinExpression(); - if (save_key_columns) - { - saved_block_sample = right_table_keys.cloneEmpty(); - } - else if (strictness == JoinStrictness::Asof) - { - /// Save ASOF key - saved_block_sample.insert(right_table_keys.safeGetByPosition(right_table_keys.columns() - 1)); - } - - /// Save non key columns - for (auto & column : sample_block_with_columns_to_add) - { - if (auto * col = saved_block_sample.findByName(column.name)) - *col = column; - else - saved_block_sample.insert(column); - } -} - -Block HashJoin::prepareRightBlock(const Block & block, const Block & saved_block_sample_) -{ - Block structured_block; - for (const auto & sample_column : saved_block_sample_.getColumnsWithTypeAndName()) - { - ColumnWithTypeAndName column = block.getByName(sample_column.name); - - /// There's no optimization for right side const columns. Remove constness if any. - column.column = recursiveRemoveSparse(column.column->convertToFullColumnIfConst()); - - if (column.column->lowCardinality() && !sample_column.column->lowCardinality()) - { - column.column = column.column->convertToFullColumnIfLowCardinality(); - column.type = removeLowCardinality(column.type); - } - - if (sample_column.column->isNullable()) - JoinCommon::convertColumnToNullable(column); - - structured_block.insert(std::move(column)); - } - - return structured_block; -} - -Block HashJoin::prepareRightBlock(const Block & block) const -{ - return prepareRightBlock(block, savedBlockSample()); -} - -bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) -{ - if (!data) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Join data was released"); - - /// RowRef::SizeT is uint32_t (not size_t) for hash table Cell memory efficiency. - /// It's possible to split bigger blocks and insert them by parts here. But it would be a dead code. - if (unlikely(source_block_.rows() > std::numeric_limits::max())) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Too many rows in right table block for HashJoin: {}", source_block_.rows()); - - /** We do not allocate memory for stored blocks inside HashJoin, only for hash table. - * In case when we have all the blocks allocated before the first `addBlockToJoin` call, will already be quite high. - * In that case memory consumed by stored blocks will be underestimated. - */ - if (!memory_usage_before_adding_blocks) - memory_usage_before_adding_blocks = getCurrentQueryMemoryUsage(); - - Block source_block = source_block_; - if (strictness == JoinStrictness::Asof) - { - chassert(kind == JoinKind::Left || kind == JoinKind::Inner); - - /// Filter out rows with NULLs in ASOF key, nulls are not joined with anything since they are not comparable - /// We support only INNER/LEFT ASOF join, so rows with NULLs never return from the right joined table. - /// So filter them out here not to handle in implementation. - const auto & asof_key_name = table_join->getOnlyClause().key_names_right.back(); - auto & asof_column = source_block.getByName(asof_key_name); - - if (asof_column.type->isNullable()) - { - /// filter rows with nulls in asof key - if (const auto * asof_const_column = typeid_cast(asof_column.column.get())) - { - if (asof_const_column->isNullAt(0)) - return false; - } - else - { - const auto & asof_column_nullable = assert_cast(*asof_column.column).getNullMapData(); - - NullMap negative_null_map(asof_column_nullable.size()); - for (size_t i = 0; i < asof_column_nullable.size(); ++i) - negative_null_map[i] = !asof_column_nullable[i]; - - for (auto & column : source_block) - column.column = column.column->filter(negative_null_map, -1); - } - } - } - - size_t rows = source_block.rows(); - - const auto & right_key_names = table_join->getAllNames(JoinTableSide::Right); - ColumnPtrMap all_key_columns(right_key_names.size()); - for (const auto & column_name : right_key_names) - { - const auto & column = source_block.getByName(column_name).column; - all_key_columns[column_name] = recursiveRemoveSparse(column->convertToFullColumnIfConst())->convertToFullColumnIfLowCardinality(); - } - - Block block_to_save = prepareRightBlock(source_block); - if (shrink_blocks) - block_to_save = block_to_save.shrinkToFit(); - - size_t max_bytes_in_join = table_join->sizeLimits().max_bytes; - size_t max_rows_in_join = table_join->sizeLimits().max_rows; - - if (kind == JoinKind::Cross && tmp_data - && (tmp_stream || (max_bytes_in_join && getTotalByteCount() + block_to_save.allocatedBytes() >= max_bytes_in_join) - || (max_rows_in_join && getTotalRowCount() + block_to_save.rows() >= max_rows_in_join))) - { - if (tmp_stream == nullptr) - { - tmp_stream = &tmp_data->createStream(right_sample_block); - } - tmp_stream->write(block_to_save); - return true; - } - - size_t total_rows = 0; - size_t total_bytes = 0; - { - if (storage_join_lock) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addBlockToJoin called when HashJoin locked to prevent updates"); - - assertBlocksHaveEqualStructure(data->sample_block, block_to_save, "joined block"); - - size_t min_bytes_to_compress = table_join->crossJoinMinBytesToCompress(); - size_t min_rows_to_compress = table_join->crossJoinMinRowsToCompress(); - - if (kind == JoinKind::Cross - && ((min_bytes_to_compress && getTotalByteCount() >= min_bytes_to_compress) - || (min_rows_to_compress && getTotalRowCount() >= min_rows_to_compress))) - { - block_to_save = block_to_save.compress(); - } - - data->blocks_allocated_size += block_to_save.allocatedBytes(); - data->blocks.emplace_back(std::move(block_to_save)); - Block * stored_block = &data->blocks.back(); - - if (rows) - data->empty = false; - - bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); - const auto & onexprs = table_join->getClauses(); - for (size_t onexpr_idx = 0; onexpr_idx < onexprs.size(); ++onexpr_idx) - { - ColumnRawPtrs key_columns; - for (const auto & name : onexprs[onexpr_idx].key_names_right) - key_columns.push_back(all_key_columns[name].get()); - - /// We will insert to the map only keys, where all components are not NULL. - ConstNullMapPtr null_map{}; - ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); - - /// If RIGHT or FULL save blocks with nulls for NotJoinedBlocks - UInt8 save_nullmap = 0; - if (isRightOrFull(kind) && null_map) - { - /// Save rows with NULL keys - for (size_t i = 0; !save_nullmap && i < null_map->size(); ++i) - save_nullmap |= (*null_map)[i]; - } - - auto join_mask_col = JoinCommon::getColumnAsMask(source_block, onexprs[onexpr_idx].condColumnNames().second); - /// Save blocks that do not hold conditions in ON section - ColumnUInt8::MutablePtr not_joined_map = nullptr; - if (!flag_per_row && isRightOrFull(kind) && join_mask_col.hasData()) - { - const auto & join_mask = join_mask_col.getData(); - /// Save rows that do not hold conditions - not_joined_map = ColumnUInt8::create(rows, 0); - for (size_t i = 0, sz = join_mask->size(); i < sz; ++i) - { - /// Condition hold, do not save row - if ((*join_mask)[i]) - continue; - - /// NULL key will be saved anyway because, do not save twice - if (save_nullmap && (*null_map)[i]) - continue; - - not_joined_map->getData()[i] = 1; - } - } - - bool is_inserted = false; - if (kind != JoinKind::Cross) - { - joinDispatch(kind, strictness, data->maps[onexpr_idx], [&](auto kind_, auto strictness_, auto & map) - { - size_t size = insertFromBlockImpl( - *this, data->type, map, rows, key_columns, key_sizes[onexpr_idx], stored_block, null_map, - /// If mask is false constant, rows are added to hashmap anyway. It's not a happy-flow, so this case is not optimized - join_mask_col.getData(), - data->pool, is_inserted); - - if (flag_per_row) - used_flags.reinit(stored_block); - else if (is_inserted) - /// Number of buckets + 1 value from zero storage - used_flags.reinit(size + 1); - }); - } - - if (!flag_per_row && save_nullmap && is_inserted) - { - data->blocks_nullmaps_allocated_size += null_map_holder->allocatedBytes(); - data->blocks_nullmaps.emplace_back(stored_block, null_map_holder); - } - - if (!flag_per_row && not_joined_map && is_inserted) - { - data->blocks_nullmaps_allocated_size += not_joined_map->allocatedBytes(); - data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map)); - } - - if (!flag_per_row && !is_inserted) - { - LOG_TRACE(log, "Skipping inserting block with {} rows", rows); - data->blocks_allocated_size -= stored_block->allocatedBytes(); - data->blocks.pop_back(); - } - - if (!check_limits) - return true; - - /// TODO: Do not calculate them every time - total_rows = getTotalRowCount(); - total_bytes = getTotalByteCount(); - } - } - - shrinkStoredBlocksToFit(total_bytes); - - return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); -} - -void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join) -{ - if (shrink_blocks) - return; /// Already shrunk - - Int64 current_memory_usage = getCurrentQueryMemoryUsage(); - Int64 query_memory_usage_delta = current_memory_usage - memory_usage_before_adding_blocks; - Int64 max_total_bytes_for_query = memory_usage_before_adding_blocks ? table_join->getMaxMemoryUsage() : 0; - - auto max_total_bytes_in_join = table_join->sizeLimits().max_bytes; - - /** If accounted data size is more than half of `max_bytes_in_join` - * or query memory consumption growth from the beginning of adding blocks (estimation of memory consumed by join using memory tracker) - * is bigger than half of all memory available for query, - * then shrink stored blocks to fit. - */ - shrink_blocks = (max_total_bytes_in_join && total_bytes_in_join > max_total_bytes_in_join / 2) || - (max_total_bytes_for_query && query_memory_usage_delta > max_total_bytes_for_query / 2); - if (!shrink_blocks) - return; - - LOG_DEBUG(log, "Shrinking stored blocks, memory consumption is {} {} calculated by join, {} {} by memory tracker", - ReadableSize(total_bytes_in_join), max_total_bytes_in_join ? fmt::format("/ {}", ReadableSize(max_total_bytes_in_join)) : "", - ReadableSize(query_memory_usage_delta), max_total_bytes_for_query ? fmt::format("/ {}", ReadableSize(max_total_bytes_for_query)) : ""); - - for (auto & stored_block : data->blocks) - { - size_t old_size = stored_block.allocatedBytes(); - stored_block = stored_block.shrinkToFit(); - size_t new_size = stored_block.allocatedBytes(); - - if (old_size >= new_size) - { - if (data->blocks_allocated_size < old_size - new_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Blocks allocated size value is broken: " - "blocks_allocated_size = {}, old_size = {}, new_size = {}", - data->blocks_allocated_size, old_size, new_size); - - data->blocks_allocated_size -= old_size - new_size; - } - else - /// Sometimes after clone resized block can be bigger than original - data->blocks_allocated_size += new_size - old_size; - } - - auto new_total_bytes_in_join = getTotalByteCount(); - - Int64 new_current_memory_usage = getCurrentQueryMemoryUsage(); - - LOG_DEBUG(log, "Shrunk stored blocks {} freed ({} by memory tracker), new memory consumption is {} ({} by memory tracker)", - ReadableSize(total_bytes_in_join - new_total_bytes_in_join), ReadableSize(current_memory_usage - new_current_memory_usage), - ReadableSize(new_total_bytes_in_join), ReadableSize(new_current_memory_usage)); - - total_bytes_in_join = new_total_bytes_in_join; -} - - -namespace -{ - -struct JoinOnKeyColumns -{ - Names key_names; - - Columns materialized_keys_holder; - ColumnRawPtrs key_columns; - - ConstNullMapPtr null_map; - ColumnPtr null_map_holder; - - /// Only rows where mask == true can be joined - JoinCommon::JoinMask join_mask_column; - - Sizes key_sizes; - - explicit JoinOnKeyColumns(const Block & block, const Names & key_names_, const String & cond_column_name, const Sizes & key_sizes_) - : key_names(key_names_) - , materialized_keys_holder(JoinCommon::materializeColumns(block, key_names)) /// Rare case, when keys are constant or low cardinality. To avoid code bloat, simply materialize them. - , key_columns(JoinCommon::getRawPointers(materialized_keys_holder)) - , null_map(nullptr) - , null_map_holder(extractNestedColumnsAndNullMap(key_columns, null_map)) - , join_mask_column(JoinCommon::getColumnAsMask(block, cond_column_name)) - , key_sizes(key_sizes_) - { - } - - bool isRowFiltered(size_t i) const { return join_mask_column.isRowFiltered(i); } -}; - -template -class AddedColumns -{ -public: - struct TypeAndName - { - DataTypePtr type; - String name; - String qualified_name; - - TypeAndName(DataTypePtr type_, const String & name_, const String & qualified_name_) - : type(type_), name(name_), qualified_name(qualified_name_) - { - } - }; - - struct LazyOutput - { - PaddedPODArray blocks; - PaddedPODArray row_nums; - }; - - AddedColumns( - const Block & left_block_, - const Block & block_with_columns_to_add, - const Block & saved_block_sample, - const HashJoin & join, - std::vector && join_on_keys_, - ExpressionActionsPtr additional_filter_expression_, - bool is_asof_join, - bool is_join_get_) - : left_block(left_block_) - , join_on_keys(join_on_keys_) - , additional_filter_expression(additional_filter_expression_) - , rows_to_add(left_block.rows()) - , is_join_get(is_join_get_) - { - size_t num_columns_to_add = block_with_columns_to_add.columns(); - if (is_asof_join) - ++num_columns_to_add; - - if constexpr (lazy) - { - has_columns_to_add = num_columns_to_add > 0; - lazy_output.blocks.reserve(rows_to_add); - lazy_output.row_nums.reserve(rows_to_add); - } - - columns.reserve(num_columns_to_add); - type_name.reserve(num_columns_to_add); - right_indexes.reserve(num_columns_to_add); - - for (const auto & src_column : block_with_columns_to_add) - { - /// Column names `src_column.name` and `qualified_name` can differ for StorageJoin, - /// because it uses not qualified right block column names - auto qualified_name = join.getTableJoin().renamedRightColumnName(src_column.name); - /// Don't insert column if it's in left block - if (!left_block.has(qualified_name)) - addColumn(src_column, qualified_name); - } - - if (is_asof_join) - { - assert(join_on_keys.size() == 1); - const ColumnWithTypeAndName & right_asof_column = join.rightAsofKeyColumn(); - addColumn(right_asof_column, right_asof_column.name); - left_asof_key = join_on_keys[0].key_columns.back(); - } - - for (auto & tn : type_name) - right_indexes.push_back(saved_block_sample.getPositionByName(tn.name)); - - nullable_column_ptrs.resize(right_indexes.size(), nullptr); - for (size_t j = 0; j < right_indexes.size(); ++j) - { - /** If it's joinGetOrNull, we will have nullable columns in result block - * even if right column is not nullable in storage (saved_block_sample). - */ - const auto & saved_column = saved_block_sample.getByPosition(right_indexes[j]).column; - if (columns[j]->isNullable() && !saved_column->isNullable()) - nullable_column_ptrs[j] = typeid_cast(columns[j].get()); - } - } - - size_t size() const { return columns.size(); } - - void buildOutput(); - - ColumnWithTypeAndName moveColumn(size_t i) - { - return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name); - } - - void appendFromBlock(const Block & block, size_t row_num, bool has_default); - - void appendDefaultRow(); - - void applyLazyDefaults(); - - const IColumn & leftAsofKey() const { return *left_asof_key; } - - Block left_block; - std::vector join_on_keys; - ExpressionActionsPtr additional_filter_expression; - - size_t max_joined_block_rows = 0; - size_t rows_to_add; - std::unique_ptr offsets_to_replicate; - bool need_filter = false; - IColumn::Filter filter; - - void reserve(bool need_replicate) - { - if (!max_joined_block_rows) - return; - - /// Do not allow big allocations when user set max_joined_block_rows to huge value - size_t reserve_size = std::min(max_joined_block_rows, DEFAULT_BLOCK_SIZE * 2); - - if (need_replicate) - /// Reserve 10% more space for columns, because some rows can be repeated - reserve_size = static_cast(1.1 * reserve_size); - - for (auto & column : columns) - column->reserve(reserve_size); - } - -private: - - void checkBlock(const Block & block) - { - for (size_t j = 0; j < right_indexes.size(); ++j) - { - const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); - const auto * dest_column = columns[j].get(); - if (auto * nullable_col = nullable_column_ptrs[j]) - { - if (!is_join_get) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Columns {} and {} can have different nullability only in joinGetOrNull", - dest_column->getName(), column_from_block->getName()); - dest_column = nullable_col->getNestedColumnPtr().get(); - } - /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, - * because dictionaries can be different, while calling insertFrom on them is safe, for example: - * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) - * and - * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) - */ - if (typeid(*dest_column) != typeid(*column_from_block)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", - dest_column->getName(), column_from_block->getName(), - demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); - } - } - - MutableColumns columns; - bool is_join_get; - std::vector right_indexes; - std::vector type_name; - std::vector nullable_column_ptrs; - size_t lazy_defaults_count = 0; - - /// for lazy - // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, - // default_count cannot represent the position of the row - LazyOutput lazy_output; - bool has_columns_to_add; - - /// for ASOF - const IColumn * left_asof_key = nullptr; - - - void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name) - { - columns.push_back(src_column.column->cloneEmpty()); - columns.back()->reserve(src_column.column->size()); - type_name.emplace_back(src_column.type, src_column.name, qualified_name); - } -}; -template<> void AddedColumns::buildOutput() -{ -} - -template<> -void AddedColumns::buildOutput() -{ - for (size_t i = 0; i < this->size(); ++i) - { - auto& col = columns[i]; - size_t default_count = 0; - auto apply_default = [&]() - { - if (default_count > 0) - { - JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); - default_count = 0; - } - }; - - for (size_t j = 0; j < lazy_output.blocks.size(); ++j) - { - if (!lazy_output.blocks[j]) - { - default_count++; - continue; - } - apply_default(); - const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); - /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. - if (is_join_get) - { - if (auto * nullable_col = typeid_cast(col.get()); - nullable_col && !column_from_block.column->isNullable()) - { - nullable_col->insertFromNotNullable(*column_from_block.column, lazy_output.row_nums[j]); - continue; - } - } - col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); - } - apply_default(); - } -} - -template<> -void AddedColumns::applyLazyDefaults() -{ - if (lazy_defaults_count) - { - for (size_t j = 0, size = right_indexes.size(); j < size; ++j) - JoinCommon::addDefaultValues(*columns[j], type_name[j].type, lazy_defaults_count); - lazy_defaults_count = 0; - } -} - -template<> -void AddedColumns::applyLazyDefaults() -{ -} - -template <> -void AddedColumns::appendFromBlock(const Block & block, size_t row_num,const bool has_defaults) -{ - if (has_defaults) - applyLazyDefaults(); - -#ifndef NDEBUG - checkBlock(block); -#endif - if (is_join_get) - { - size_t right_indexes_size = right_indexes.size(); - for (size_t j = 0; j < right_indexes_size; ++j) - { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - if (auto * nullable_col = nullable_column_ptrs[j]) - nullable_col->insertFromNotNullable(*column_from_block.column, row_num); - else - columns[j]->insertFrom(*column_from_block.column, row_num); - } - } - else - { - size_t right_indexes_size = right_indexes.size(); - for (size_t j = 0; j < right_indexes_size; ++j) - { - const auto & column_from_block = block.getByPosition(right_indexes[j]); - columns[j]->insertFrom(*column_from_block.column, row_num); - } - } -} - -template <> -void AddedColumns::appendFromBlock(const Block & block, size_t row_num, bool) -{ -#ifndef NDEBUG - checkBlock(block); -#endif - if (has_columns_to_add) - { - lazy_output.blocks.emplace_back(reinterpret_cast(&block)); - lazy_output.row_nums.emplace_back(static_cast(row_num)); - } -} -template<> -void AddedColumns::appendDefaultRow() -{ - ++lazy_defaults_count; -} - -template<> -void AddedColumns::appendDefaultRow() -{ - if (has_columns_to_add) - { - lazy_output.blocks.emplace_back(0); - lazy_output.row_nums.emplace_back(0); - } -} - -template -struct JoinFeatures -{ - static constexpr bool is_any_join = STRICTNESS == JoinStrictness::Any; - static constexpr bool is_any_or_semi_join = STRICTNESS == JoinStrictness::Any || STRICTNESS == JoinStrictness::RightAny || (STRICTNESS == JoinStrictness::Semi && KIND == JoinKind::Left); - static constexpr bool is_all_join = STRICTNESS == JoinStrictness::All; - static constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; - static constexpr bool is_semi_join = STRICTNESS == JoinStrictness::Semi; - static constexpr bool is_anti_join = STRICTNESS == JoinStrictness::Anti; - - static constexpr bool left = KIND == JoinKind::Left; - static constexpr bool right = KIND == JoinKind::Right; - static constexpr bool inner = KIND == JoinKind::Inner; - static constexpr bool full = KIND == JoinKind::Full; - - static constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right); - static constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left)); - static constexpr bool add_missing = (left || full) && !is_semi_join; - - static constexpr bool need_flags = MapGetter::flagged; -}; - -template -class KnownRowsHolder; - -/// Keep already joined rows to prevent duplication if many disjuncts -/// if for a particular pair of rows condition looks like TRUE or TRUE or TRUE -/// we want to have it once in resultset -template<> -class KnownRowsHolder -{ -public: - using Type = std::pair; - -private: - static const size_t MAX_LINEAR = 16; // threshold to switch from Array to Set - using ArrayHolder = std::array; - using SetHolder = std::set; - using SetHolderPtr = std::unique_ptr; - - ArrayHolder array_holder; - SetHolderPtr set_holder_ptr; - - size_t items; - -public: - KnownRowsHolder() - : items(0) - { - } - - - template - void add(InputIt from, InputIt to) - { - const size_t new_items = std::distance(from, to); - if (items + new_items <= MAX_LINEAR) - { - std::copy(from, to, &array_holder[items]); - } - else - { - if (items <= MAX_LINEAR) - { - set_holder_ptr = std::make_unique(); - set_holder_ptr->insert(std::cbegin(array_holder), std::cbegin(array_holder) + items); - } - set_holder_ptr->insert(from, to); - } - items += new_items; - } - - template - bool isKnown(const Needle & needle) - { - return items <= MAX_LINEAR - ? std::find(std::cbegin(array_holder), std::cbegin(array_holder) + items, needle) != std::cbegin(array_holder) + items - : set_holder_ptr->find(needle) != set_holder_ptr->end(); - } -}; - -template<> -class KnownRowsHolder -{ -public: - template - void add(InputIt, InputIt) - { - } - - template - static bool isKnown(const Needle &) - { - return false; - } -}; - -template -void addFoundRowAll( - const typename Map::mapped_type & mapped, - AddedColumns & added, - IColumn::Offset & current_offset, - KnownRowsHolder & known_rows [[maybe_unused]], - JoinStuff::JoinUsedFlags * used_flags [[maybe_unused]]) -{ - if constexpr (add_missing) - added.applyLazyDefaults(); - - if constexpr (flag_per_row) - { - std::unique_ptr::Type>> new_known_rows_ptr; - - for (auto it = mapped.begin(); it.ok(); ++it) - { - if (!known_rows.isKnown(std::make_pair(it->block, it->row_num))) - { - added.appendFromBlock(*it->block, it->row_num, false); - ++current_offset; - if (!new_known_rows_ptr) - { - new_known_rows_ptr = std::make_unique::Type>>(); - } - new_known_rows_ptr->push_back(std::make_pair(it->block, it->row_num)); - if (used_flags) - { - used_flags->JoinStuff::JoinUsedFlags::setUsedOnce( - FindResultImpl(*it, true, 0)); - } - } - } - - if (new_known_rows_ptr) - { - known_rows.add(std::cbegin(*new_known_rows_ptr), std::cend(*new_known_rows_ptr)); - } - } - else - { - for (auto it = mapped.begin(); it.ok(); ++it) - { - added.appendFromBlock(*it->block, it->row_num, false); - ++current_offset; - } - } -} - -template -void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) -{ - if constexpr (add_missing) - { - added.appendDefaultRow(); - if constexpr (need_offset) - ++current_offset; - } -} - -template -void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]) -{ - if constexpr (need_filter) - filter[pos] = 1; -} - -template -ColumnPtr buildAdditionalFilter( - size_t left_start_row, - const std::vector & selected_rows, - const std::vector & row_replicate_offset, - AddedColumns & added_columns) -{ - ColumnPtr result_column; - do - { - if (selected_rows.empty()) - { - result_column = ColumnUInt8::create(); - break; - } - const Block & sample_right_block = *selected_rows.begin()->block; - if (!sample_right_block || !added_columns.additional_filter_expression) - { - auto filter = ColumnUInt8::create(); - filter->insertMany(1, selected_rows.size()); - result_column = std::move(filter); - break; - } - - auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); - if (required_cols.empty()) - { - Block block; - added_columns.additional_filter_expression->execute(block); - result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); - break; - } - NameSet required_column_names; - for (auto & col : required_cols) - required_column_names.insert(col.name); - - Block executed_block; - size_t right_col_pos = 0; - for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) - { - if (required_column_names.contains(col.name)) - { - auto new_col = col.column->cloneEmpty(); - for (const auto & selected_row : selected_rows) - { - const auto & src_col = selected_row.block->getByPosition(right_col_pos); - new_col->insertFrom(*src_col.column, selected_row.row_num); - } - executed_block.insert({std::move(new_col), col.type, col.name}); - } - right_col_pos += 1; - } - if (!executed_block) - { - result_column = ColumnUInt8::create(); - break; - } - - for (const auto & col_name : required_column_names) - { - const auto * src_col = added_columns.left_block.findByName(col_name); - if (!src_col) - continue; - auto new_col = src_col->column->cloneEmpty(); - size_t prev_left_offset = 0; - for (size_t i = 1; i < row_replicate_offset.size(); ++i) - { - const size_t & left_offset = row_replicate_offset[i]; - size_t rows = left_offset - prev_left_offset; - if (rows) - new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); - prev_left_offset = left_offset; - } - executed_block.insert({std::move(new_col), src_col->type, col_name}); - } - if (!executed_block) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", - required_cols.toString(), - sample_right_block.dumpNames(), - added_columns.left_block.dumpNames()); - } - - for (const auto & col : executed_block.getColumnsWithTypeAndName()) - if (!col.column || !col.type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); - - added_columns.additional_filter_expression->execute(executed_block); - result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); - executed_block.clear(); - } while (false); - - result_column = result_column->convertToFullIfNeeded(); - if (result_column->isNullable()) - { - /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros - /// Trying to avoid copying data, since we are the only owner of the column. - ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); - - MutableColumnPtr mutable_column; - { - ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); - result_column.reset(); - mutable_column = IColumn::mutate(std::move(nested_column)); - } - - auto & column_data = assert_cast(*mutable_column).getData(); - const auto & mask_column_data = assert_cast(*mask_column).getData(); - for (size_t i = 0; i < column_data.size(); ++i) - { - if (mask_column_data[i]) - column_data[i] = 0; - } - return mutable_column; - } - return result_column; -} - -/// Adapter class to pass into addFoundRowAll -/// In joinRightColumnsWithAdditionalFilter we don't want to add rows directly into AddedColumns, -/// because they need to be filtered by additional_filter_expression. -class PreSelectedRows : public std::vector -{ -public: - void appendFromBlock(const Block & block, size_t row_num, bool /* has_default */) { this->emplace_back(&block, row_num); } -}; - -/// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. -template < - typename KeyGetter, - typename Map, - bool need_replication, - typename AddedColumns> -NO_INLINE size_t joinRightColumnsWithAddtitionalFilter( - std::vector && key_getter_vector, - const std::vector & mapv, - AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], - bool need_filter [[maybe_unused]], - bool need_flags [[maybe_unused]], - bool add_missing [[maybe_unused]], - bool flag_per_row [[maybe_unused]]) -{ - size_t left_block_rows = added_columns.rows_to_add; - if (need_filter) - added_columns.filter = IColumn::Filter(left_block_rows, 0); - - std::unique_ptr pool; - - if constexpr (need_replication) - added_columns.offsets_to_replicate = std::make_unique(left_block_rows); - - std::vector row_replicate_offset; - row_replicate_offset.reserve(left_block_rows); - - using FindResult = typename KeyGetter::FindResult; - size_t max_joined_block_rows = added_columns.max_joined_block_rows; - size_t left_row_iter = 0; - PreSelectedRows selected_rows; - selected_rows.reserve(left_block_rows); - std::vector find_results; - find_results.reserve(left_block_rows); - bool exceeded_max_block_rows = false; - IColumn::Offset total_added_rows = 0; - IColumn::Offset current_added_rows = 0; - - auto collect_keys_matched_rows_refs = [&]() - { - pool = std::make_unique(); - find_results.clear(); - row_replicate_offset.clear(); - row_replicate_offset.push_back(0); - current_added_rows = 0; - selected_rows.clear(); - for (; left_row_iter < left_block_rows; ++left_row_iter) - { - if constexpr (need_replication) - { - if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) - { - break; - } - } - KnownRowsHolder all_flag_known_rows; - KnownRowsHolder single_flag_know_rows; - for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) - { - const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; - if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) - continue; - - bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); - auto find_result = row_acceptable - ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) - : FindResult(); - - if (find_result.isFound()) - { - auto & mapped = find_result.getMapped(); - find_results.push_back(find_result); - if (flag_per_row) - addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); - else - addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); - } - } - row_replicate_offset.push_back(current_added_rows); - } - }; - - auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) - { - const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); - - size_t prev_replicated_row = 0; - auto selected_right_row_it = selected_rows.begin(); - size_t find_result_index = 0; - for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) - { - bool any_matched = false; - /// For all right join, flag_per_row is true, we need mark used flags for each row. - if (flag_per_row) - { - for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) - { - if (filter_flags[replicated_row]) - { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); - total_added_rows += 1; - if (need_flags) - used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); - } - ++selected_right_row_it; - } - } - else - { - for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) - { - if (filter_flags[replicated_row]) - { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); - total_added_rows += 1; - } - ++selected_right_row_it; - } - } - if (!any_matched) - { - if (add_missing) - addNotFoundRow(added_columns, total_added_rows); - else - addNotFoundRow(added_columns, total_added_rows); - } - else - { - if (!flag_per_row && need_flags) - used_flags.template setUsed(find_results[find_result_index]); - if (need_filter) - setUsed(added_columns.filter, left_start_row + i - 1); - if (add_missing) - added_columns.applyLazyDefaults(); - } - find_result_index += (prev_replicated_row != row_replicate_offset[i]); - - if constexpr (need_replication) - { - (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; - } - prev_replicated_row = row_replicate_offset[i]; - } - }; - - while (left_row_iter < left_block_rows && !exceeded_max_block_rows) - { - auto left_start_row = left_row_iter; - collect_keys_matched_rows_refs(); - if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " - "left_start_row: {}", - selected_rows.size(), - current_added_rows, - row_replicate_offset.size(), - left_row_iter, - left_start_row); - } - auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); - copy_final_matched_rows(left_start_row, filter_col); - - if constexpr (need_replication) - { - // Add a check for current_added_rows to avoid run the filter expression on too small size batch. - if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) - { - exceeded_max_block_rows = true; - } - } - } - - if constexpr (need_replication) - { - added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); - added_columns.filter.resize_assume_reserved(left_row_iter); - } - added_columns.applyLazyDefaults(); - return left_row_iter; -} - -/// Joins right table columns which indexes are present in right_indexes using specified map. -/// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). -template -NO_INLINE size_t joinRightColumns( - std::vector && key_getter_vector, - const std::vector & mapv, - AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]]) -{ - constexpr JoinFeatures join_features; - - size_t rows = added_columns.rows_to_add; - if constexpr (need_filter) - added_columns.filter = IColumn::Filter(rows, 0); - - Arena pool; - - if constexpr (join_features.need_replication) - added_columns.offsets_to_replicate = std::make_unique(rows); - - IColumn::Offset current_offset = 0; - size_t max_joined_block_rows = added_columns.max_joined_block_rows; - size_t i = 0; - for (; i < rows; ++i) - { - if constexpr (join_features.need_replication) - { - if (unlikely(current_offset >= max_joined_block_rows)) - { - added_columns.offsets_to_replicate->resize_assume_reserved(i); - added_columns.filter.resize_assume_reserved(i); - break; - } - } - - bool right_row_found = false; - - KnownRowsHolder known_rows; - for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) - { - const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; - if (join_keys.null_map && (*join_keys.null_map)[i]) - continue; - - bool row_acceptable = !join_keys.isRowFiltered(i); - using FindResult = typename KeyGetter::FindResult; - auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); - - if (find_result.isFound()) - { - right_row_found = true; - auto & mapped = find_result.getMapped(); - if constexpr (join_features.is_asof_join) - { - const IColumn & left_asof_key = added_columns.leftAsofKey(); - - auto row_ref = mapped->findAsof(left_asof_key, i); - if (row_ref.block) - { - setUsed(added_columns.filter, i); - if constexpr (flag_per_row) - used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); - else - used_flags.template setUsed(find_result); - - added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing); - } - else - addNotFoundRow(added_columns, current_offset); - } - else if constexpr (join_features.is_all_join) - { - setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); - auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); - } - else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) - { - /// Use first appeared left key + it needs left columns replication - bool used_once = used_flags.template setUsedOnce(find_result); - if (used_once) - { - auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - setUsed(added_columns.filter, i); - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); - } - } - else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) - { - bool used_once = used_flags.template setUsedOnce(find_result); - - /// Use first appeared left key only - if (used_once) - { - setUsed(added_columns.filter, i); - added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); - } - - break; - } - else if constexpr (join_features.is_any_join && join_features.full) - { - /// TODO - } - else if constexpr (join_features.is_anti_join) - { - if constexpr (join_features.right && join_features.need_flags) - used_flags.template setUsed(find_result); - } - else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) - { - setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); - - if (join_features.is_any_or_semi_join) - { - break; - } - } - } - } - - if (!right_row_found) - { - if constexpr (join_features.is_anti_join && join_features.left) - setUsed(added_columns.filter, i); - addNotFoundRow(added_columns, current_offset); - } - - if constexpr (join_features.need_replication) - { - (*added_columns.offsets_to_replicate)[i] = current_offset; - } - } - - added_columns.applyLazyDefaults(); - return i; -} - -template -size_t joinRightColumnsSwitchMultipleDisjuncts( - std::vector && key_getter_vector, - const std::vector & mapv, - AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]]) -{ - constexpr JoinFeatures join_features; - if constexpr (join_features.is_all_join) - { - if (added_columns.additional_filter_expression) - { - bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; - return joinRightColumnsWithAddtitionalFilter( - std::forward>(key_getter_vector), - mapv, - added_columns, - used_flags, - need_filter, - join_features.need_flags, - join_features.add_missing, - mark_per_row_used); - } - } - - if (added_columns.additional_filter_expression) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN"); - - return mapv.size() > 1 - ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) - : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); -} - -template -size_t joinRightColumnsSwitchNullability( - std::vector && key_getter_vector, - const std::vector & mapv, - AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags) -{ - if (added_columns.need_filter) - { - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); - } - else - { - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); - } -} - -template -size_t switchJoinRightColumns( - const std::vector & mapv, - AddedColumns & added_columns, - HashJoin::Type type, - JoinStuff::JoinUsedFlags & used_flags) -{ - constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; - switch (type) - { - case HashJoin::Type::EMPTY: - { - if constexpr (!is_asof_join) - { - using KeyGetter = KeyGetterEmpty; - std::vector key_getter_vector; - key_getter_vector.emplace_back(); - - using MapTypeVal = typename KeyGetter::MappedType; - std::vector a_map_type_vector; - a_map_type_vector.emplace_back(); - return joinRightColumnsSwitchNullability( - std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); - } - throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type); - } - #define M(TYPE) \ - case HashJoin::Type::TYPE: \ - { \ - using MapTypeVal = const typename std::remove_reference_t::element_type; \ - using KeyGetter = typename KeyGetterForType::Type; \ - std::vector a_map_type_vector(mapv.size()); \ - std::vector key_getter_vector; \ - for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \ - { \ - const auto & join_on_key = added_columns.join_on_keys[d]; \ - a_map_type_vector[d] = mapv[d]->TYPE.get(); \ - key_getter_vector.push_back(std::move(createKeyGetter(join_on_key.key_columns, join_on_key.key_sizes))); \ - } \ - return joinRightColumnsSwitchNullability( \ - std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); \ - } - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - - default: - throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type); - } -} - -/** Since we do not store right key columns, - * this function is used to copy left key columns to right key columns. - * If the user requests some right columns, we just copy left key columns to right, since they are equal. - * Example: SELECT t1.key, t2.key FROM t1 FULL JOIN t2 ON t1.key = t2.key; - * In that case for matched rows in t2.key we will use values from t1.key. - * However, in some cases we might need to adjust the type of column, e.g. t1.key :: LowCardinality(String) and t2.key :: String - * Also, the nullability of the column might be different. - * Returns the right column after with necessary adjustments. - */ -ColumnWithTypeAndName copyLeftKeyColumnToRight( - const DataTypePtr & right_key_type, const String & renamed_right_column, const ColumnWithTypeAndName & left_column, const IColumn::Filter * null_map_filter = nullptr) -{ - ColumnWithTypeAndName right_column = left_column; - right_column.name = renamed_right_column; - - if (null_map_filter) - right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter); - - bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type); - if (null_map_filter) - correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter); - else - correctNullabilityInplace(right_column, should_be_nullable); - - if (!right_column.type->equals(*right_key_type)) - { - right_column.column = castColumnAccurate(right_column, right_key_type); - right_column.type = right_key_type; - } - - right_column.column = right_column.column->convertToFullColumnIfConst(); - return right_column; -} - -/// Cut first num_rows rows from block in place and returns block with remaining rows -Block sliceBlock(Block & block, size_t num_rows) -{ - size_t total_rows = block.rows(); - if (num_rows >= total_rows) - return {}; - size_t remaining_rows = total_rows - num_rows; - Block remaining_block = block.cloneEmpty(); - for (size_t i = 0; i < block.columns(); ++i) - { - auto & col = block.getByPosition(i); - remaining_block.getByPosition(i).column = col.column->cut(num_rows, remaining_rows); - col.column = col.column->cut(0, num_rows); - } - return remaining_block; -} - -} /// nameless - -template -Block HashJoin::joinBlockImpl( - Block & block, - const Block & block_with_columns_to_add, - const std::vector & maps_, - bool is_join_get) const -{ - constexpr JoinFeatures join_features; - - std::vector join_on_keys; - const auto & onexprs = table_join->getClauses(); - for (size_t i = 0; i < onexprs.size(); ++i) - { - const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right; - join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, key_sizes[i]); - } - size_t existing_columns = block.columns(); - - /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. - * Because if they are constants, then in the "not joined" rows, they may have different values - * - default values, which can differ from the values of these constants. - */ - if constexpr (join_features.right || join_features.full) - { - materializeBlockInplace(block); - } - - /** For LEFT/INNER JOIN, the saved blocks do not contain keys. - * For FULL/RIGHT JOIN, the saved blocks contain keys; - * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. - * For ASOF, the last column is used as the ASOF column - */ - AddedColumns added_columns( - block, - block_with_columns_to_add, - savedBlockSample(), - *this, - std::move(join_on_keys), - table_join->getMixedJoinExpression(), - join_features.is_asof_join, - is_join_get); - - bool has_required_right_keys = (required_right_keys.columns() != 0); - added_columns.need_filter = join_features.need_filter || has_required_right_keys; - added_columns.max_joined_block_rows = max_joined_block_rows; - if (!added_columns.max_joined_block_rows) - added_columns.max_joined_block_rows = std::numeric_limits::max(); - else - added_columns.reserve(join_features.need_replication); - - size_t num_joined = switchJoinRightColumns(maps_, added_columns, data->type, used_flags); - /// Do not hold memory for join_on_keys anymore - added_columns.join_on_keys.clear(); - Block remaining_block = sliceBlock(block, num_joined); - - added_columns.buildOutput(); - for (size_t i = 0; i < added_columns.size(); ++i) - block.insert(added_columns.moveColumn(i)); - - std::vector right_keys_to_replicate [[maybe_unused]]; - - if constexpr (join_features.need_filter) - { - /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. - for (size_t i = 0; i < existing_columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1); - - /// Add join key columns from right block if needed using value from left table because of equality - for (size_t i = 0; i < required_right_keys.columns(); ++i) - { - const auto & right_key = required_right_keys.getByPosition(i); - /// asof column is already in block. - if (join_features.is_asof_join && right_key.name == table_join->getOnlyClause().key_names_right.back()) - continue; - - const auto & left_column = block.getByName(required_right_keys_sources[i]); - const auto & right_col_name = getTableJoin().renamedRightColumnName(right_key.name); - auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column); - block.insert(std::move(right_col)); - } - } - else if (has_required_right_keys) - { - /// Add join key columns from right block if needed. - for (size_t i = 0; i < required_right_keys.columns(); ++i) - { - const auto & right_key = required_right_keys.getByPosition(i); - auto right_col_name = getTableJoin().renamedRightColumnName(right_key.name); - /// asof column is already in block. - if (join_features.is_asof_join && right_key.name == table_join->getOnlyClause().key_names_right.back()) - continue; - - const auto & left_column = block.getByName(required_right_keys_sources[i]); - auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter); - block.insert(std::move(right_col)); - - if constexpr (join_features.need_replication) - right_keys_to_replicate.push_back(block.getPositionByName(right_col_name)); - } - } - - if constexpr (join_features.need_replication) - { - std::unique_ptr & offsets_to_replicate = added_columns.offsets_to_replicate; - - /// If ALL ... JOIN - we replicate all the columns except the new ones. - for (size_t i = 0; i < existing_columns; ++i) - { - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); - } - - /// Replicate additional right keys - for (size_t pos : right_keys_to_replicate) - { - block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); - } - } - - return remaining_block; -} - -void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const -{ - size_t start_left_row = 0; - size_t start_right_block = 0; - std::unique_ptr reader = nullptr; - if (not_processed) - { - auto & continuation = static_cast(*not_processed); - start_left_row = continuation.left_position; - start_right_block = continuation.right_block; - reader = std::move(continuation.reader); - not_processed.reset(); - } - - size_t num_existing_columns = block.columns(); - size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); - - ColumnRawPtrs src_left_columns; - MutableColumns dst_columns; - - { - src_left_columns.reserve(num_existing_columns); - dst_columns.reserve(num_existing_columns + num_columns_to_add); - - for (const ColumnWithTypeAndName & left_column : block) - { - src_left_columns.push_back(left_column.column.get()); - dst_columns.emplace_back(src_left_columns.back()->cloneEmpty()); - } - - for (const ColumnWithTypeAndName & right_column : sample_block_with_columns_to_add) - dst_columns.emplace_back(right_column.column->cloneEmpty()); - - for (auto & dst : dst_columns) - dst->reserve(max_joined_block_rows); - } - - size_t rows_left = block.rows(); - size_t rows_added = 0; - for (size_t left_row = start_left_row; left_row < rows_left; ++left_row) - { - size_t block_number = 0; - - auto process_right_block = [&](const Block & block_right) - { - size_t rows_right = block_right.rows(); - rows_added += rows_right; - - for (size_t col_num = 0; col_num < num_existing_columns; ++col_num) - dst_columns[col_num]->insertManyFrom(*src_left_columns[col_num], left_row, rows_right); - - for (size_t col_num = 0; col_num < num_columns_to_add; ++col_num) - { - const IColumn & column_right = *block_right.getByPosition(col_num).column; - dst_columns[num_existing_columns + col_num]->insertRangeFrom(column_right, 0, rows_right); - } - }; - - for (const Block & compressed_block_right : data->blocks) - { - ++block_number; - if (block_number < start_right_block) - continue; - - auto block_right = compressed_block_right.decompress(); - process_right_block(block_right); - if (rows_added > max_joined_block_rows) - { - break; - } - } - - if (tmp_stream && rows_added <= max_joined_block_rows) - { - if (reader == nullptr) - { - tmp_stream->finishWritingAsyncSafe(); - reader = tmp_stream->getReadStream(); - } - while (auto block_right = reader->read()) - { - ++block_number; - process_right_block(block_right); - if (rows_added > max_joined_block_rows) - { - break; - } - } - - /// It means, that reader->read() returned {} - if (rows_added <= max_joined_block_rows) - { - reader.reset(); - } - } - - start_right_block = 0; - - if (rows_added > max_joined_block_rows) - { - not_processed = std::make_shared( - NotProcessedCrossJoin{{block.cloneEmpty()}, left_row, block_number + 1, std::move(reader)}); - not_processed->block.swap(block); - break; - } - } - - for (const ColumnWithTypeAndName & src_column : sample_block_with_columns_to_add) - block.insert(src_column); - - block = block.cloneWithColumns(std::move(dst_columns)); -} - -DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const -{ - size_t num_keys = data_types.size(); - if (right_table_keys.columns() != num_keys) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function joinGet{} doesn't match: passed, should be equal to {}", - toString(or_null ? "OrNull" : ""), toString(num_keys)); - - for (size_t i = 0; i < num_keys; ++i) - { - const auto & left_type_origin = data_types[i]; - const auto & [c2, right_type_origin, right_name] = right_table_keys.safeGetByPosition(i); - auto left_type = removeNullable(recursiveRemoveLowCardinality(left_type_origin)); - auto right_type = removeNullable(recursiveRemoveLowCardinality(right_type_origin)); - if (!left_type->equals(*right_type)) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in joinGet key {}: " - "found type {}, while the needed type is {}", i, left_type->getName(), right_type->getName()); - } - - if (!sample_block_with_columns_to_add.has(column_name)) - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "StorageJoin doesn't contain column {}", column_name); - - auto elem = sample_block_with_columns_to_add.getByName(column_name); - if (or_null && JoinCommon::canBecomeNullable(elem.type)) - elem.type = makeNullable(elem.type); - return elem.type; -} - -/// TODO: return multiple columns as named tuple -/// TODO: return array of values when strictness == JoinStrictness::All -ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const -{ - bool is_valid = (strictness == JoinStrictness::Any || strictness == JoinStrictness::RightAny) - && kind == JoinKind::Left; - if (!is_valid) - throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "joinGet only supports StorageJoin of type Left Any"); - const auto & key_names_right = table_join->getOnlyClause().key_names_right; - - /// Assemble the key block with correct names. - Block keys; - for (size_t i = 0; i < block.columns(); ++i) - { - auto key = block.getByPosition(i); - key.name = key_names_right[i]; - keys.insert(std::move(key)); - } - - static_assert(!MapGetter::flagged, - "joinGet are not protected from hash table changes between block processing"); - - std::vector maps_vector; - maps_vector.push_back(&std::get(data->maps[0])); - joinBlockImpl( - keys, block_with_columns_to_add, maps_vector, /* is_join_get = */ true); - return keys.getByPosition(keys.columns() - 1); -} - -void HashJoin::checkTypesOfKeys(const Block & block) const -{ - for (const auto & onexpr : table_join->getClauses()) - { - JoinCommon::checkTypesOfKeys(block, onexpr.key_names_left, right_table_keys, onexpr.key_names_right); - } -} - -void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) -{ - if (!data) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); - - for (const auto & onexpr : table_join->getClauses()) - { - auto cond_column_name = onexpr.condColumnNames(); - JoinCommon::checkTypesOfKeys( - block, onexpr.key_names_left, cond_column_name.first, - right_sample_block, onexpr.key_names_right, cond_column_name.second); - } - - if (kind == JoinKind::Cross) - { - joinBlockImplCross(block, not_processed); - return; - } - - if (kind == JoinKind::Right || kind == JoinKind::Full) - { - materializeBlockInplace(block); - } - - { - std::vectormaps[0])> * > maps_vector; - for (size_t i = 0; i < table_join->getClauses().size(); ++i) - maps_vector.push_back(&data->maps[i]); - - if (joinDispatch(kind, strictness, maps_vector, [&](auto kind_, auto strictness_, auto & maps_vector_) - { - Block remaining_block = joinBlockImpl(block, sample_block_with_columns_to_add, maps_vector_); - if (remaining_block.rows()) - not_processed = std::make_shared(ExtraBlock{std::move(remaining_block)}); - else - not_processed.reset(); - })) - { - /// Joined - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong JOIN combination: {} {}", strictness, kind); - } -} - -HashJoin::~HashJoin() -{ - if (!data) - { - LOG_TEST(log, "{}Join data has been already released", instance_log_id); - return; - } - LOG_TEST( - log, - "{}Join data is being destroyed, {} bytes and {} rows in hash table", - instance_log_id, - getTotalByteCount(), - getTotalRowCount()); -} - -template -struct AdderNonJoined -{ - static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) - { - constexpr bool mapped_asof = std::is_same_v; - [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; - - if constexpr (mapped_asof) - { - /// Do nothing - } - else if constexpr (mapped_one) - { - for (size_t j = 0; j < columns_right.size(); ++j) - { - const auto & mapped_column = mapped.block->getByPosition(j).column; - columns_right[j]->insertFrom(*mapped_column, mapped.row_num); - } - - ++rows_added; - } - else - { - for (auto it = mapped.begin(); it.ok(); ++it) - { - for (size_t j = 0; j < columns_right.size(); ++j) - { - const auto & mapped_column = it->block->getByPosition(j).column; - columns_right[j]->insertFrom(*mapped_column, it->row_num); - } - - ++rows_added; - } - } - } -}; - -/// Stream from not joined earlier rows of the right table. -/// Based on: -/// - map offsetInternal saved in used_flags for single disjuncts -/// - flags in BlockWithFlags for multiple disjuncts -class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller -{ -public: - NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_, bool flag_per_row_) - : parent(parent_) - , max_block_size(max_block_size_) - , flag_per_row(flag_per_row_) - , current_block_start(0) - { - if (parent.data == nullptr) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); - } - - Block getEmptyBlock() override { return parent.savedBlockSample().cloneEmpty(); } - - size_t fillColumns(MutableColumns & columns_right) override - { - size_t rows_added = 0; - if (unlikely(parent.data->type == HashJoin::Type::EMPTY)) - { - rows_added = fillColumnsFromData(parent.data->blocks, columns_right); - } - else - { - auto fill_callback = [&](auto, auto, auto & map) - { - rows_added = fillColumnsFromMap(map, columns_right); - }; - - if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness); - } - - if (!flag_per_row) - { - fillNullsFromBlocks(columns_right, rows_added); - } - - return rows_added; - } - -private: - const HashJoin & parent; - UInt64 max_block_size; - bool flag_per_row; - - size_t current_block_start; - - std::any position; - std::optional nulls_position; - std::optional used_position; - - size_t fillColumnsFromData(const BlocksList & blocks, MutableColumns & columns_right) - { - if (!position.has_value()) - position = std::make_any(blocks.begin()); - - auto & block_it = std::any_cast(position); - auto end = blocks.end(); - - size_t rows_added = 0; - for (; block_it != end; ++block_it) - { - size_t rows_from_block = std::min(max_block_size - rows_added, block_it->rows() - current_block_start); - for (size_t j = 0; j < columns_right.size(); ++j) - { - const auto & col = block_it->getByPosition(j).column; - columns_right[j]->insertRangeFrom(*col, current_block_start, rows_from_block); - } - rows_added += rows_from_block; - - if (rows_added >= max_block_size) - { - /// How many rows have been read - current_block_start += rows_from_block; - if (block_it->rows() <= current_block_start) - { - /// current block was fully read - ++block_it; - current_block_start = 0; - } - break; - } - current_block_start = 0; - } - return rows_added; - } - - template - size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right) - { - switch (parent.data->type) - { - #define M(TYPE) \ - case HashJoin::Type::TYPE: \ - return fillColumns(*maps.TYPE, columns_keys_and_right); - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - default: - throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type); - } - } - - template - size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right) - { - size_t rows_added = 0; - - if (flag_per_row) - { - if (!used_position.has_value()) - used_position = parent.data->blocks.begin(); - - auto end = parent.data->blocks.end(); - - for (auto & it = *used_position; it != end && rows_added < max_block_size; ++it) - { - const Block & mapped_block = *it; - - for (size_t row = 0; row < mapped_block.rows(); ++row) - { - if (!parent.isUsed(&mapped_block, row)) - { - for (size_t colnum = 0; colnum < columns_keys_and_right.size(); ++colnum) - { - columns_keys_and_right[colnum]->insertFrom(*mapped_block.getByPosition(colnum).column, row); - } - - ++rows_added; - } - } - } - } - else - { - using Mapped = typename Map::mapped_type; - using Iterator = typename Map::const_iterator; - - - if (!position.has_value()) - position = std::make_any(map.begin()); - - Iterator & it = std::any_cast(position); - auto end = map.end(); - - for (; it != end; ++it) - { - const Mapped & mapped = it->getMapped(); - - size_t offset = map.offsetInternal(it.getPtr()); - if (parent.isUsed(offset)) - continue; - AdderNonJoined::add(mapped, rows_added, columns_keys_and_right); - - if (rows_added >= max_block_size) - { - ++it; - break; - } - } - } - - return rows_added; - } - - void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added) - { - if (!nulls_position.has_value()) - nulls_position = parent.data->blocks_nullmaps.begin(); - - auto end = parent.data->blocks_nullmaps.end(); - - for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it) - { - const auto * block = it->first; - ConstNullMapPtr nullmap = nullptr; - if (it->second) - nullmap = &assert_cast(*it->second).getData(); - - for (size_t row = 0; row < block->rows(); ++row) - { - if (nullmap && (*nullmap)[row]) - { - for (size_t col = 0; col < columns_keys_and_right.size(); ++col) - columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row); - ++rows_added; - } - } - } - } -}; - -IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block, - const Block & result_sample_block, - UInt64 max_block_size) const -{ - if (!JoinCommon::hasNonJoinedBlocks(*table_join)) - return {}; - size_t left_columns_count = left_sample_block.columns(); - - bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); - if (!flag_per_row) - { - /// With multiple disjuncts, all keys are in sample_block_with_columns_to_add, so invariant is not held - size_t expected_columns_count = left_columns_count + required_right_keys.columns() + sample_block_with_columns_to_add.columns(); - if (expected_columns_count != result_sample_block.columns()) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected number of columns in result sample block: {} instead of {} ({} + {} + {})", - result_sample_block.columns(), expected_columns_count, - left_columns_count, required_right_keys.columns(), sample_block_with_columns_to_add.columns()); - } - } - - auto non_joined = std::make_unique(*this, max_block_size, flag_per_row); - return std::make_unique(std::move(non_joined), result_sample_block, left_columns_count, *table_join); -} - -void HashJoin::reuseJoinedData(const HashJoin & join) -{ - data = join.data; - from_storage_join = true; - - bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); - if (flag_per_row) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); - - for (auto & map : data->maps) - { - joinDispatch(kind, strictness, map, [this](auto kind_, auto strictness_, auto & map_) - { - used_flags.reinit(map_.getBufferSizeInCells(data->type) + 1); - }); - } -} - -BlocksList HashJoin::releaseJoinedBlocks(bool restructure) -{ - LOG_TRACE(log, "{}Join data is being released, {} bytes and {} rows in hash table", instance_log_id, getTotalByteCount(), getTotalRowCount()); - - BlocksList right_blocks = std::move(data->blocks); - if (!restructure) - { - data.reset(); - return right_blocks; - } - - data->maps.clear(); - data->blocks_nullmaps.clear(); - - BlocksList restored_blocks; - - /// names to positions optimization - std::vector positions; - std::vector is_nullable; - if (!right_blocks.empty()) - { - positions.reserve(right_sample_block.columns()); - const Block & tmp_block = *right_blocks.begin(); - for (const auto & sample_column : right_sample_block) - { - positions.emplace_back(tmp_block.getPositionByName(sample_column.name)); - is_nullable.emplace_back(isNullableOrLowCardinalityNullable(sample_column.type)); - } - } - - for (Block & saved_block : right_blocks) - { - Block restored_block; - for (size_t i = 0; i < positions.size(); ++i) - { - auto & column = saved_block.getByPosition(positions[i]); - correctNullabilityInplace(column, is_nullable[i]); - restored_block.insert(column); - } - restored_blocks.emplace_back(std::move(restored_block)); - } - - data.reset(); - return restored_blocks; -} - -const ColumnWithTypeAndName & HashJoin::rightAsofKeyColumn() const -{ - /// It should be nullable when right side is nullable - return savedBlockSample().getByName(table_join->getOnlyClause().key_names_right.back()); -} - -void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additional_filter_expression) -{ - if (!additional_filter_expression) - return; - - Block expression_sample_block = additional_filter_expression->getSampleBlock(); - - if (expression_sample_block.columns() != 1) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected expression in JOIN ON section. Expected single column, got '{}'", - expression_sample_block.dumpStructure()); - } - - auto type = removeNullable(expression_sample_block.getByPosition(0).type); - if (!type->equals(*std::make_shared())) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected expression in JOIN ON section. Expected boolean (UInt8), got '{}'. expression:\n{}", - expression_sample_block.getByPosition(0).type->getName(), - additional_filter_expression->dumpActions()); - } - - bool is_supported = (strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind)); - if (!is_supported) - { - throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "Non equi condition '{}' from JOIN ON section is supported only for ALL INNER/LEFT/FULL/RIGHT JOINs", - expression_sample_block.getByPosition(0).name); - } -} - -bool HashJoin::needUsedFlagsForPerRightTableRow(std::shared_ptr table_join_) const -{ - if (!table_join_->oneDisjunct()) - return true; - /// If it'a a all right join with inequal conditions, we need to mark each row - if (table_join_->getMixedJoinExpression() && isRightOrFull(table_join_->kind())) - return true; - return false; -} - -} diff --git a/src/Interpreters/HashJoin/AddedColumns.cpp b/src/Interpreters/HashJoin/AddedColumns.cpp new file mode 100644 index 00000000000..930a352744d --- /dev/null +++ b/src/Interpreters/HashJoin/AddedColumns.cpp @@ -0,0 +1,138 @@ +#include +#include + +namespace DB +{ +JoinOnKeyColumns::JoinOnKeyColumns(const Block & block, const Names & key_names_, const String & cond_column_name, const Sizes & key_sizes_) + : key_names(key_names_) + , materialized_keys_holder(JoinCommon::materializeColumns( + block, key_names)) /// Rare case, when keys are constant or low cardinality. To avoid code bloat, simply materialize them. + , key_columns(JoinCommon::getRawPointers(materialized_keys_holder)) + , null_map(nullptr) + , null_map_holder(extractNestedColumnsAndNullMap(key_columns, null_map)) + , join_mask_column(JoinCommon::getColumnAsMask(block, cond_column_name)) + , key_sizes(key_sizes_) +{ +} + +template<> void AddedColumns::buildOutput() +{ +} + +template<> +void AddedColumns::buildOutput() +{ + for (size_t i = 0; i < this->size(); ++i) + { + auto& col = columns[i]; + size_t default_count = 0; + auto apply_default = [&]() + { + if (default_count > 0) + { + JoinCommon::addDefaultValues(*col, type_name[i].type, default_count); + default_count = 0; + } + }; + + for (size_t j = 0; j < lazy_output.blocks.size(); ++j) + { + if (!lazy_output.blocks[j]) + { + default_count++; + continue; + } + apply_default(); + const auto & column_from_block = reinterpret_cast(lazy_output.blocks[j])->getByPosition(right_indexes[i]); + /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. + if (is_join_get) + { + if (auto * nullable_col = typeid_cast(col.get()); + nullable_col && !column_from_block.column->isNullable()) + { + nullable_col->insertFromNotNullable(*column_from_block.column, lazy_output.row_nums[j]); + continue; + } + } + col->insertFrom(*column_from_block.column, lazy_output.row_nums[j]); + } + apply_default(); + } +} + +template<> +void AddedColumns::applyLazyDefaults() +{ + if (lazy_defaults_count) + { + for (size_t j = 0, size = right_indexes.size(); j < size; ++j) + JoinCommon::addDefaultValues(*columns[j], type_name[j].type, lazy_defaults_count); + lazy_defaults_count = 0; + } +} + +template<> +void AddedColumns::applyLazyDefaults() +{ +} + +template <> +void AddedColumns::appendFromBlock(const Block & block, size_t row_num,const bool has_defaults) +{ + if (has_defaults) + applyLazyDefaults(); + +#ifndef NDEBUG + checkBlock(block); +#endif + if (is_join_get) + { + size_t right_indexes_size = right_indexes.size(); + for (size_t j = 0; j < right_indexes_size; ++j) + { + const auto & column_from_block = block.getByPosition(right_indexes[j]); + if (auto * nullable_col = nullable_column_ptrs[j]) + nullable_col->insertFromNotNullable(*column_from_block.column, row_num); + else + columns[j]->insertFrom(*column_from_block.column, row_num); + } + } + else + { + size_t right_indexes_size = right_indexes.size(); + for (size_t j = 0; j < right_indexes_size; ++j) + { + const auto & column_from_block = block.getByPosition(right_indexes[j]); + columns[j]->insertFrom(*column_from_block.column, row_num); + } + } +} + +template <> +void AddedColumns::appendFromBlock(const Block & block, size_t row_num, bool) +{ +#ifndef NDEBUG + checkBlock(block); +#endif + if (has_columns_to_add) + { + lazy_output.blocks.emplace_back(reinterpret_cast(&block)); + lazy_output.row_nums.emplace_back(static_cast(row_num)); + } +} +template<> +void AddedColumns::appendDefaultRow() +{ + ++lazy_defaults_count; +} + +template<> +void AddedColumns::appendDefaultRow() +{ + if (has_columns_to_add) + { + lazy_output.blocks.emplace_back(0); + lazy_output.row_nums.emplace_back(0); + } +} +} diff --git a/src/Interpreters/HashJoin/AddedColumns.h b/src/Interpreters/HashJoin/AddedColumns.h new file mode 100644 index 00000000000..13a7df6f498 --- /dev/null +++ b/src/Interpreters/HashJoin/AddedColumns.h @@ -0,0 +1,226 @@ +#pragma once +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +class ExpressionActions; +using ExpressionActionsPtr = std::shared_ptr; + +struct JoinOnKeyColumns +{ + Names key_names; + + Columns materialized_keys_holder; + ColumnRawPtrs key_columns; + + ConstNullMapPtr null_map; + ColumnPtr null_map_holder; + + /// Only rows where mask == true can be joined + JoinCommon::JoinMask join_mask_column; + + Sizes key_sizes; + + explicit JoinOnKeyColumns(const Block & block, const Names & key_names_, const String & cond_column_name, const Sizes & key_sizes_); + + bool isRowFiltered(size_t i) const { return join_mask_column.isRowFiltered(i); } +}; + +template +class AddedColumns +{ +public: + struct TypeAndName + { + DataTypePtr type; + String name; + String qualified_name; + + TypeAndName(DataTypePtr type_, const String & name_, const String & qualified_name_) + : type(type_), name(name_), qualified_name(qualified_name_) + { + } + }; + + struct LazyOutput + { + PaddedPODArray blocks; + PaddedPODArray row_nums; + }; + + AddedColumns( + const Block & left_block_, + const Block & block_with_columns_to_add, + const Block & saved_block_sample, + const HashJoin & join, + std::vector && join_on_keys_, + ExpressionActionsPtr additional_filter_expression_, + bool is_asof_join, + bool is_join_get_) + : left_block(left_block_) + , join_on_keys(join_on_keys_) + , additional_filter_expression(additional_filter_expression_) + , rows_to_add(left_block.rows()) + , is_join_get(is_join_get_) + { + size_t num_columns_to_add = block_with_columns_to_add.columns(); + if (is_asof_join) + ++num_columns_to_add; + + if constexpr (lazy) + { + has_columns_to_add = num_columns_to_add > 0; + lazy_output.blocks.reserve(rows_to_add); + lazy_output.row_nums.reserve(rows_to_add); + } + + columns.reserve(num_columns_to_add); + type_name.reserve(num_columns_to_add); + right_indexes.reserve(num_columns_to_add); + + for (const auto & src_column : block_with_columns_to_add) + { + /// Column names `src_column.name` and `qualified_name` can differ for StorageJoin, + /// because it uses not qualified right block column names + auto qualified_name = join.getTableJoin().renamedRightColumnName(src_column.name); + /// Don't insert column if it's in left block + if (!left_block.has(qualified_name)) + addColumn(src_column, qualified_name); + } + + if (is_asof_join) + { + assert(join_on_keys.size() == 1); + const ColumnWithTypeAndName & right_asof_column = join.rightAsofKeyColumn(); + addColumn(right_asof_column, right_asof_column.name); + left_asof_key = join_on_keys[0].key_columns.back(); + } + + for (auto & tn : type_name) + right_indexes.push_back(saved_block_sample.getPositionByName(tn.name)); + + nullable_column_ptrs.resize(right_indexes.size(), nullptr); + for (size_t j = 0; j < right_indexes.size(); ++j) + { + /** If it's joinGetOrNull, we will have nullable columns in result block + * even if right column is not nullable in storage (saved_block_sample). + */ + const auto & saved_column = saved_block_sample.getByPosition(right_indexes[j]).column; + if (columns[j]->isNullable() && !saved_column->isNullable()) + nullable_column_ptrs[j] = typeid_cast(columns[j].get()); + } + } + + size_t size() const { return columns.size(); } + + void buildOutput(); + + ColumnWithTypeAndName moveColumn(size_t i) + { + return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name); + } + + void appendFromBlock(const Block & block, size_t row_num, bool has_default); + + void appendDefaultRow(); + + void applyLazyDefaults(); + + const IColumn & leftAsofKey() const { return *left_asof_key; } + + Block left_block; + std::vector join_on_keys; + ExpressionActionsPtr additional_filter_expression; + + size_t max_joined_block_rows = 0; + size_t rows_to_add; + std::unique_ptr offsets_to_replicate; + bool need_filter = false; + IColumn::Filter filter; + + void reserve(bool need_replicate) + { + if (!max_joined_block_rows) + return; + + /// Do not allow big allocations when user set max_joined_block_rows to huge value + size_t reserve_size = std::min(max_joined_block_rows, DEFAULT_BLOCK_SIZE * 2); + + if (need_replicate) + /// Reserve 10% more space for columns, because some rows can be repeated + reserve_size = static_cast(1.1 * reserve_size); + + for (auto & column : columns) + column->reserve(reserve_size); + } + +private: + + void checkBlock(const Block & block) + { + for (size_t j = 0; j < right_indexes.size(); ++j) + { + const auto * column_from_block = block.getByPosition(right_indexes[j]).column.get(); + const auto * dest_column = columns[j].get(); + if (auto * nullable_col = nullable_column_ptrs[j]) + { + if (!is_join_get) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Columns {} and {} can have different nullability only in joinGetOrNull", + dest_column->getName(), column_from_block->getName()); + dest_column = nullable_col->getNestedColumnPtr().get(); + } + /** Using dest_column->structureEquals(*column_from_block) will not work for low cardinality columns, + * because dictionaries can be different, while calling insertFrom on them is safe, for example: + * ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1))) + * and + * ColumnLowCardinality(size = 0, UInt16(size = 0), ColumnUnique(size = 1, String(size = 1))) + */ + if (typeid(*dest_column) != typeid(*column_from_block)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns {} and {} have different types {} and {}", + dest_column->getName(), column_from_block->getName(), + demangle(typeid(*dest_column).name()), demangle(typeid(*column_from_block).name())); + } + } + + MutableColumns columns; + bool is_join_get; + std::vector right_indexes; + std::vector type_name; + std::vector nullable_column_ptrs; + size_t lazy_defaults_count = 0; + + /// for lazy + // The default row is represented by an empty RowRef, so that fixed-size blocks can be generated sequentially, + // default_count cannot represent the position of the row + LazyOutput lazy_output; + bool has_columns_to_add; + + /// for ASOF + const IColumn * left_asof_key = nullptr; + + + void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name) + { + columns.push_back(src_column.column->cloneEmpty()); + columns.back()->reserve(src_column.column->size()); + type_name.emplace_back(src_column.type, src_column.name, qualified_name); + } +}; + +/// Adapter class to pass into addFoundRowAll +/// In joinRightColumnsWithAdditionalFilter we don't want to add rows directly into AddedColumns, +/// because they need to be filtered by additional_filter_expression. +class PreSelectedRows : public std::vector +{ +public: + void appendFromBlock(const Block & block, size_t row_num, bool /* has_default */) { this->emplace_back(&block, row_num); } +}; + +} diff --git a/src/Interpreters/HashJoin/FullHashJoin.cpp b/src/Interpreters/HashJoin/FullHashJoin.cpp new file mode 100644 index 00000000000..5d058d10fc2 --- /dev/null +++ b/src/Interpreters/HashJoin/FullHashJoin.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +} diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp new file mode 100644 index 00000000000..fa8ebd2c0f0 --- /dev/null +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -0,0 +1,1333 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +#include +#include + +namespace CurrentMetrics +{ + extern const Metric TemporaryFilesForJoin; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int NO_SUCH_COLUMN_IN_TABLE; + extern const int INCOMPATIBLE_TYPE_OF_JOIN; + extern const int UNSUPPORTED_JOIN_KEYS; + extern const int LOGICAL_ERROR; + extern const int SYNTAX_ERROR; + extern const int SET_SIZE_LIMIT_EXCEEDED; + extern const int TYPE_MISMATCH; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int INVALID_JOIN_ON_EXPRESSION; +} + +namespace +{ + +struct NotProcessedCrossJoin : public ExtraBlock +{ + size_t left_position; + size_t right_block; + std::unique_ptr reader; +}; + + +Int64 getCurrentQueryMemoryUsage() +{ + /// Use query-level memory tracker + if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) + if (auto * memory_tracker = memory_tracker_child->getParent()) + return memory_tracker->get(); + return 0; +} + +} + +static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) +{ + if (nullable) + { + JoinCommon::convertColumnToNullable(column); + } + else + { + /// We have to replace values masked by NULLs with defaults. + if (column.column) + if (const auto * nullable_column = checkAndGetColumn(&*column.column)) + column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); + + JoinCommon::removeColumnNullability(column); + } +} + +HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_sample_block_, + bool any_take_last_row_, size_t reserve_num_, const String & instance_id_) + : table_join(table_join_) + , kind(table_join->kind()) + , strictness(table_join->strictness()) + , any_take_last_row(any_take_last_row_) + , reserve_num(reserve_num_) + , instance_id(instance_id_) + , asof_inequality(table_join->getAsofInequality()) + , data(std::make_shared()) + , tmp_data( + table_join_->getTempDataOnDisk() + ? std::make_unique(table_join_->getTempDataOnDisk(), CurrentMetrics::TemporaryFilesForJoin) + : nullptr) + , right_sample_block(right_sample_block_) + , max_joined_block_rows(table_join->maxJoinedBlockRows()) + , instance_log_id(!instance_id_.empty() ? "(" + instance_id_ + ") " : "") + , log(getLogger("HashJoin")) +{ + LOG_TRACE(log, "{}Keys: {}, datatype: {}, kind: {}, strictness: {}, right header: {}", + instance_log_id, TableJoin::formatClauses(table_join->getClauses(), true), data->type, kind, strictness, right_sample_block.dumpStructure()); + + validateAdditionalFilterExpression(table_join->getMixedJoinExpression()); + + used_flags = std::make_unique(); + + if (isCrossOrComma(kind)) + { + data->type = Type::CROSS; + sample_block_with_columns_to_add = right_sample_block; + } + else if (table_join->getClauses().empty()) + { + data->type = Type::EMPTY; + /// We might need to insert default values into the right columns, materialize them + sample_block_with_columns_to_add = materializeBlock(right_sample_block); + } + else if (table_join->oneDisjunct()) + { + const auto & key_names_right = table_join->getOnlyClause().key_names_right; + JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys, sample_block_with_columns_to_add); + required_right_keys = table_join->getRequiredRightKeys(right_table_keys, required_right_keys_sources); + } + else + { + /// required right keys concept does not work well if multiple disjuncts, we need all keys + sample_block_with_columns_to_add = right_table_keys = materializeBlock(right_sample_block); + } + + materializeBlockInplace(right_table_keys); + initRightBlockStructure(data->sample_block); + data->sample_block = prepareRightBlock(data->sample_block); + + JoinCommon::createMissedColumns(sample_block_with_columns_to_add); + + size_t disjuncts_num = table_join->getClauses().size(); + data->maps.resize(disjuncts_num); + key_sizes.reserve(disjuncts_num); + + for (const auto & clause : table_join->getClauses()) + { + const auto & key_names_right = clause.key_names_right; + ColumnRawPtrs key_columns = JoinCommon::extractKeysForJoin(right_table_keys, key_names_right); + + if (strictness == JoinStrictness::Asof) + { + assert(disjuncts_num == 1); + + /// @note ASOF JOIN is not INNER. It's better avoid use of 'INNER ASOF' combination in messages. + /// In fact INNER means 'LEFT SEMI ASOF' while LEFT means 'LEFT OUTER ASOF'. + if (!isLeft(kind) && !isInner(kind)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Wrong ASOF JOIN type. Only ASOF and LEFT ASOF joins are supported"); + + if (key_columns.size() <= 1) + throw Exception(ErrorCodes::SYNTAX_ERROR, "ASOF join needs at least one equi-join column"); + + size_t asof_size; + asof_type = SortedLookupVectorBase::getTypeSize(*key_columns.back(), asof_size); + key_columns.pop_back(); + + /// this is going to set up the appropriate hash table for the direct lookup part of the join + /// However, this does not depend on the size of the asof join key (as that goes into the BST) + /// Therefore, add it back in such that it can be extracted appropriately from the full stored + /// key_columns and key_sizes + auto & asof_key_sizes = key_sizes.emplace_back(); + data->type = chooseMethod(kind, key_columns, asof_key_sizes); + asof_key_sizes.push_back(asof_size); + } + else + { + /// Choose data structure to use for JOIN. + auto current_join_method = chooseMethod(kind, key_columns, key_sizes.emplace_back()); + if (data->type == Type::EMPTY) + data->type = current_join_method; + else if (data->type != current_join_method) + data->type = Type::hashed; + } + } + + for (auto & maps : data->maps) + dataMapInit(maps); +} + +HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes) +{ + size_t keys_size = key_columns.size(); + + if (keys_size == 0) + { + if (isCrossOrComma(kind)) + return Type::CROSS; + return Type::EMPTY; + } + + bool all_fixed = true; + size_t keys_bytes = 0; + key_sizes.resize(keys_size); + for (size_t j = 0; j < keys_size; ++j) + { + if (!key_columns[j]->isFixedAndContiguous()) + { + all_fixed = false; + break; + } + key_sizes[j] = key_columns[j]->sizeOfValueIfFixed(); + keys_bytes += key_sizes[j]; + } + + /// If there is one numeric key that fits in 64 bits + if (keys_size == 1 && key_columns[0]->isNumeric()) + { + size_t size_of_field = key_columns[0]->sizeOfValueIfFixed(); + if (size_of_field == 1) + return Type::key8; + if (size_of_field == 2) + return Type::key16; + if (size_of_field == 4) + return Type::key32; + if (size_of_field == 8) + return Type::key64; + if (size_of_field == 16) + return Type::keys128; + if (size_of_field == 32) + return Type::keys256; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); + } + + /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys + if (all_fixed && keys_bytes <= 16) + return Type::keys128; + if (all_fixed && keys_bytes <= 32) + return Type::keys256; + + /// If there is single string key, use hash table of it's values. + if (keys_size == 1) + { + auto is_string_column = [](const IColumn * column_ptr) -> bool + { + if (const auto * lc_column_ptr = typeid_cast(column_ptr)) + return typeid_cast(lc_column_ptr->getDictionary().getNestedColumn().get()); + return typeid_cast(column_ptr); + }; + + const auto * key_column = key_columns[0]; + if (is_string_column(key_column) || + (isColumnConst(*key_column) && is_string_column(assert_cast(key_column)->getDataColumnPtr().get()))) + return Type::key_string; + } + + if (keys_size == 1 && typeid_cast(key_columns[0])) + return Type::key_fixed_string; + + /// Otherwise, will use set of cryptographic hashes of unambiguously serialized values. + return Type::hashed; +} + +template +static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) +{ + if constexpr (is_asof_join) + { + auto key_column_copy = key_columns; + auto key_size_copy = key_sizes; + key_column_copy.pop_back(); + key_size_copy.pop_back(); + return KeyGetter(key_column_copy, key_size_copy, nullptr); + } + else + return KeyGetter(key_columns, key_sizes, nullptr); +} + +void HashJoin::dataMapInit(MapsVariant & map) +{ + if (kind == JoinKind::Cross) + return; + joinDispatchInit(kind, strictness, map); + joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.create(data->type); }); + + if (reserve_num) + { + joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); }); + } + + if (!data) + throw Exception(ErrorCodes::LOGICAL_ERROR, "HashJoin::dataMapInit called with empty data"); +} + +bool HashJoin::empty() const +{ + return data->type == Type::EMPTY; +} + +bool HashJoin::alwaysReturnsEmptySet() const +{ + return isInnerOrRight(getKind()) && data->empty; +} + +size_t HashJoin::getTotalRowCount() const +{ + if (!data) + return 0; + + size_t res = 0; + + if (data->type == Type::CROSS) + { + for (const auto & block : data->blocks) + res += block.rows(); + } + else + { + for (const auto & map : data->maps) + { + joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalRowCount(data->type); }); + } + } + + return res; +} + +size_t HashJoin::getTotalByteCount() const +{ + if (!data) + return 0; + +#ifndef NDEBUG + size_t debug_blocks_allocated_size = 0; + for (const auto & block : data->blocks) + debug_blocks_allocated_size += block.allocatedBytes(); + + if (data->blocks_allocated_size != debug_blocks_allocated_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_allocated_size != debug_blocks_allocated_size ({} != {})", + data->blocks_allocated_size, debug_blocks_allocated_size); + + size_t debug_blocks_nullmaps_allocated_size = 0; + for (const auto & nullmap : data->blocks_nullmaps) + debug_blocks_nullmaps_allocated_size += nullmap.second->allocatedBytes(); + + if (data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})", + data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size); +#endif + + size_t res = 0; + + res += data->blocks_allocated_size; + res += data->blocks_nullmaps_allocated_size; + res += data->pool.allocatedBytes(); + + if (data->type != Type::CROSS) + { + for (const auto & map : data->maps) + { + joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); }); + } + } + return res; +} + +void HashJoin::initRightBlockStructure(Block & saved_block_sample) +{ + if (isCrossOrComma(kind)) + { + /// cross join doesn't have keys, just add all columns + saved_block_sample = sample_block_with_columns_to_add.cloneEmpty(); + return; + } + + bool multiple_disjuncts = !table_join->oneDisjunct(); + /// We could remove key columns for LEFT | INNER HashJoin but we should keep them for JoinSwitcher (if any). + bool save_key_columns = table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO) || + table_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH) || + isRightOrFull(kind) || + multiple_disjuncts || + table_join->getMixedJoinExpression(); + if (save_key_columns) + { + saved_block_sample = right_table_keys.cloneEmpty(); + } + else if (strictness == JoinStrictness::Asof) + { + /// Save ASOF key + saved_block_sample.insert(right_table_keys.safeGetByPosition(right_table_keys.columns() - 1)); + } + + /// Save non key columns + for (auto & column : sample_block_with_columns_to_add) + { + if (auto * col = saved_block_sample.findByName(column.name)) + *col = column; + else + saved_block_sample.insert(column); + } +} + +Block HashJoin::prepareRightBlock(const Block & block, const Block & saved_block_sample_) +{ + Block structured_block; + for (const auto & sample_column : saved_block_sample_.getColumnsWithTypeAndName()) + { + ColumnWithTypeAndName column = block.getByName(sample_column.name); + + /// There's no optimization for right side const columns. Remove constness if any. + column.column = recursiveRemoveSparse(column.column->convertToFullColumnIfConst()); + + if (column.column->lowCardinality() && !sample_column.column->lowCardinality()) + { + column.column = column.column->convertToFullColumnIfLowCardinality(); + column.type = removeLowCardinality(column.type); + } + + if (sample_column.column->isNullable()) + JoinCommon::convertColumnToNullable(column); + + structured_block.insert(std::move(column)); + } + + return structured_block; +} + +Block HashJoin::prepareRightBlock(const Block & block) const +{ + return prepareRightBlock(block, savedBlockSample()); +} + +bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) +{ + if (!data) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Join data was released"); + + /// RowRef::SizeT is uint32_t (not size_t) for hash table Cell memory efficiency. + /// It's possible to split bigger blocks and insert them by parts here. But it would be a dead code. + if (unlikely(source_block_.rows() > std::numeric_limits::max())) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Too many rows in right table block for HashJoin: {}", source_block_.rows()); + + /** We do not allocate memory for stored blocks inside HashJoin, only for hash table. + * In case when we have all the blocks allocated before the first `addBlockToJoin` call, will already be quite high. + * In that case memory consumed by stored blocks will be underestimated. + */ + if (!memory_usage_before_adding_blocks) + memory_usage_before_adding_blocks = getCurrentQueryMemoryUsage(); + + Block source_block = source_block_; + if (strictness == JoinStrictness::Asof) + { + chassert(kind == JoinKind::Left || kind == JoinKind::Inner); + + /// Filter out rows with NULLs in ASOF key, nulls are not joined with anything since they are not comparable + /// We support only INNER/LEFT ASOF join, so rows with NULLs never return from the right joined table. + /// So filter them out here not to handle in implementation. + const auto & asof_key_name = table_join->getOnlyClause().key_names_right.back(); + auto & asof_column = source_block.getByName(asof_key_name); + + if (asof_column.type->isNullable()) + { + /// filter rows with nulls in asof key + if (const auto * asof_const_column = typeid_cast(asof_column.column.get())) + { + if (asof_const_column->isNullAt(0)) + return false; + } + else + { + const auto & asof_column_nullable = assert_cast(*asof_column.column).getNullMapData(); + + NullMap negative_null_map(asof_column_nullable.size()); + for (size_t i = 0; i < asof_column_nullable.size(); ++i) + negative_null_map[i] = !asof_column_nullable[i]; + + for (auto & column : source_block) + column.column = column.column->filter(negative_null_map, -1); + } + } + } + + size_t rows = source_block.rows(); + + const auto & right_key_names = table_join->getAllNames(JoinTableSide::Right); + ColumnPtrMap all_key_columns(right_key_names.size()); + for (const auto & column_name : right_key_names) + { + const auto & column = source_block.getByName(column_name).column; + all_key_columns[column_name] = recursiveRemoveSparse(column->convertToFullColumnIfConst())->convertToFullColumnIfLowCardinality(); + } + + Block block_to_save = prepareRightBlock(source_block); + if (shrink_blocks) + block_to_save = block_to_save.shrinkToFit(); + + size_t max_bytes_in_join = table_join->sizeLimits().max_bytes; + size_t max_rows_in_join = table_join->sizeLimits().max_rows; + + if (kind == JoinKind::Cross && tmp_data + && (tmp_stream || (max_bytes_in_join && getTotalByteCount() + block_to_save.allocatedBytes() >= max_bytes_in_join) + || (max_rows_in_join && getTotalRowCount() + block_to_save.rows() >= max_rows_in_join))) + { + if (tmp_stream == nullptr) + { + tmp_stream = &tmp_data->createStream(right_sample_block); + } + tmp_stream->write(block_to_save); + return true; + } + + size_t total_rows = 0; + size_t total_bytes = 0; + { + if (storage_join_lock) + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addBlockToJoin called when HashJoin locked to prevent updates"); + + assertBlocksHaveEqualStructure(data->sample_block, block_to_save, "joined block"); + + size_t min_bytes_to_compress = table_join->crossJoinMinBytesToCompress(); + size_t min_rows_to_compress = table_join->crossJoinMinRowsToCompress(); + + if (kind == JoinKind::Cross + && ((min_bytes_to_compress && getTotalByteCount() >= min_bytes_to_compress) + || (min_rows_to_compress && getTotalRowCount() >= min_rows_to_compress))) + { + block_to_save = block_to_save.compress(); + have_compressed = true; + } + + data->blocks_allocated_size += block_to_save.allocatedBytes(); + data->blocks.emplace_back(std::move(block_to_save)); + Block * stored_block = &data->blocks.back(); + + if (rows) + data->empty = false; + + bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); + const auto & onexprs = table_join->getClauses(); + for (size_t onexpr_idx = 0; onexpr_idx < onexprs.size(); ++onexpr_idx) + { + ColumnRawPtrs key_columns; + for (const auto & name : onexprs[onexpr_idx].key_names_right) + key_columns.push_back(all_key_columns[name].get()); + + /// We will insert to the map only keys, where all components are not NULL. + ConstNullMapPtr null_map{}; + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); + + /// If RIGHT or FULL save blocks with nulls for NotJoinedBlocks + UInt8 save_nullmap = 0; + if (isRightOrFull(kind) && null_map) + { + /// Save rows with NULL keys + for (size_t i = 0; !save_nullmap && i < null_map->size(); ++i) + save_nullmap |= (*null_map)[i]; + } + + auto join_mask_col = JoinCommon::getColumnAsMask(source_block, onexprs[onexpr_idx].condColumnNames().second); + /// Save blocks that do not hold conditions in ON section + ColumnUInt8::MutablePtr not_joined_map = nullptr; + if (!flag_per_row && isRightOrFull(kind) && join_mask_col.hasData()) + { + const auto & join_mask = join_mask_col.getData(); + /// Save rows that do not hold conditions + not_joined_map = ColumnUInt8::create(rows, 0); + for (size_t i = 0, sz = join_mask->size(); i < sz; ++i) + { + /// Condition hold, do not save row + if ((*join_mask)[i]) + continue; + + /// NULL key will be saved anyway because, do not save twice + if (save_nullmap && (*null_map)[i]) + continue; + + not_joined_map->getData()[i] = 1; + } + } + + bool is_inserted = false; + if (kind != JoinKind::Cross) + { + joinDispatch(kind, strictness, data->maps[onexpr_idx], [&](auto kind_, auto strictness_, auto & map) + { + size_t size = HashJoinMethods>::insertFromBlockImpl( + *this, + data->type, + map, + rows, + key_columns, + key_sizes[onexpr_idx], + stored_block, + null_map, + join_mask_col.getData(), + data->pool, + is_inserted); + + if (flag_per_row) + used_flags->reinit(stored_block); + else if (is_inserted) + /// Number of buckets + 1 value from zero storage + used_flags->reinit(size + 1); + }); + } + + if (!flag_per_row && save_nullmap && is_inserted) + { + data->blocks_nullmaps_allocated_size += null_map_holder->allocatedBytes(); + data->blocks_nullmaps.emplace_back(stored_block, null_map_holder); + } + + if (!flag_per_row && not_joined_map && is_inserted) + { + data->blocks_nullmaps_allocated_size += not_joined_map->allocatedBytes(); + data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map)); + } + + if (!flag_per_row && !is_inserted) + { + LOG_TRACE(log, "Skipping inserting block with {} rows", rows); + data->blocks_allocated_size -= stored_block->allocatedBytes(); + data->blocks.pop_back(); + } + + if (!check_limits) + return true; + + /// TODO: Do not calculate them every time + total_rows = getTotalRowCount(); + total_bytes = getTotalByteCount(); + } + } + + shrinkStoredBlocksToFit(total_bytes); + + return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); +} + +void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join) +{ + if (shrink_blocks) + return; /// Already shrunk + + Int64 current_memory_usage = getCurrentQueryMemoryUsage(); + Int64 query_memory_usage_delta = current_memory_usage - memory_usage_before_adding_blocks; + Int64 max_total_bytes_for_query = memory_usage_before_adding_blocks ? table_join->getMaxMemoryUsage() : 0; + + auto max_total_bytes_in_join = table_join->sizeLimits().max_bytes; + + /** If accounted data size is more than half of `max_bytes_in_join` + * or query memory consumption growth from the beginning of adding blocks (estimation of memory consumed by join using memory tracker) + * is bigger than half of all memory available for query, + * then shrink stored blocks to fit. + */ + shrink_blocks = (max_total_bytes_in_join && total_bytes_in_join > max_total_bytes_in_join / 2) || + (max_total_bytes_for_query && query_memory_usage_delta > max_total_bytes_for_query / 2); + if (!shrink_blocks) + return; + + LOG_DEBUG(log, "Shrinking stored blocks, memory consumption is {} {} calculated by join, {} {} by memory tracker", + ReadableSize(total_bytes_in_join), max_total_bytes_in_join ? fmt::format("/ {}", ReadableSize(max_total_bytes_in_join)) : "", + ReadableSize(query_memory_usage_delta), max_total_bytes_for_query ? fmt::format("/ {}", ReadableSize(max_total_bytes_for_query)) : ""); + + for (auto & stored_block : data->blocks) + { + size_t old_size = stored_block.allocatedBytes(); + stored_block = stored_block.shrinkToFit(); + size_t new_size = stored_block.allocatedBytes(); + + if (old_size >= new_size) + { + if (data->blocks_allocated_size < old_size - new_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Blocks allocated size value is broken: " + "blocks_allocated_size = {}, old_size = {}, new_size = {}", + data->blocks_allocated_size, old_size, new_size); + + data->blocks_allocated_size -= old_size - new_size; + } + else + /// Sometimes after clone resized block can be bigger than original + data->blocks_allocated_size += new_size - old_size; + } + + auto new_total_bytes_in_join = getTotalByteCount(); + + Int64 new_current_memory_usage = getCurrentQueryMemoryUsage(); + + LOG_DEBUG(log, "Shrunk stored blocks {} freed ({} by memory tracker), new memory consumption is {} ({} by memory tracker)", + ReadableSize(total_bytes_in_join - new_total_bytes_in_join), ReadableSize(current_memory_usage - new_current_memory_usage), + ReadableSize(new_total_bytes_in_join), ReadableSize(new_current_memory_usage)); + + total_bytes_in_join = new_total_bytes_in_join; +} + +void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const +{ + size_t start_left_row = 0; + size_t start_right_block = 0; + std::unique_ptr reader = nullptr; + if (not_processed) + { + auto & continuation = static_cast(*not_processed); + start_left_row = continuation.left_position; + start_right_block = continuation.right_block; + reader = std::move(continuation.reader); + not_processed.reset(); + } + + size_t num_existing_columns = block.columns(); + size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); + + ColumnRawPtrs src_left_columns; + MutableColumns dst_columns; + + { + src_left_columns.reserve(num_existing_columns); + dst_columns.reserve(num_existing_columns + num_columns_to_add); + + for (const ColumnWithTypeAndName & left_column : block) + { + src_left_columns.push_back(left_column.column.get()); + dst_columns.emplace_back(src_left_columns.back()->cloneEmpty()); + } + + for (const ColumnWithTypeAndName & right_column : sample_block_with_columns_to_add) + dst_columns.emplace_back(right_column.column->cloneEmpty()); + + for (auto & dst : dst_columns) + dst->reserve(max_joined_block_rows); + } + + size_t rows_left = block.rows(); + size_t rows_added = 0; + for (size_t left_row = start_left_row; left_row < rows_left; ++left_row) + { + size_t block_number = 0; + + auto process_right_block = [&](const Block & block_right) + { + size_t rows_right = block_right.rows(); + rows_added += rows_right; + + for (size_t col_num = 0; col_num < num_existing_columns; ++col_num) + dst_columns[col_num]->insertManyFrom(*src_left_columns[col_num], left_row, rows_right); + + for (size_t col_num = 0; col_num < num_columns_to_add; ++col_num) + { + const IColumn & column_right = *block_right.getByPosition(col_num).column; + dst_columns[num_existing_columns + col_num]->insertRangeFrom(column_right, 0, rows_right); + } + }; + + for (const Block & block_right : data->blocks) + { + ++block_number; + if (block_number < start_right_block) + continue; + /// The following statement cannot be substituted with `process_right_block(!have_compressed ? block_right : block_right.decompress())` + /// because it will lead to copying of `block_right` even if its branch is taken (because common type of `block_right` and `block_right.decompress()` is `Block`). + if (!have_compressed) + process_right_block(block_right); + else + process_right_block(block_right.decompress()); + + if (rows_added > max_joined_block_rows) + { + break; + } + } + + if (tmp_stream && rows_added <= max_joined_block_rows) + { + if (reader == nullptr) + { + tmp_stream->finishWritingAsyncSafe(); + reader = tmp_stream->getReadStream(); + } + while (auto block_right = reader->read()) + { + ++block_number; + process_right_block(block_right); + if (rows_added > max_joined_block_rows) + { + break; + } + } + + /// It means, that reader->read() returned {} + if (rows_added <= max_joined_block_rows) + { + reader.reset(); + } + } + + start_right_block = 0; + + if (rows_added > max_joined_block_rows) + { + not_processed = std::make_shared( + NotProcessedCrossJoin{{block.cloneEmpty()}, left_row, block_number + 1, std::move(reader)}); + not_processed->block.swap(block); + break; + } + } + + for (const ColumnWithTypeAndName & src_column : sample_block_with_columns_to_add) + block.insert(src_column); + + block = block.cloneWithColumns(std::move(dst_columns)); +} + +DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const +{ + size_t num_keys = data_types.size(); + if (right_table_keys.columns() != num_keys) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function joinGet{} doesn't match: passed, should be equal to {}", + toString(or_null ? "OrNull" : ""), toString(num_keys)); + + for (size_t i = 0; i < num_keys; ++i) + { + const auto & left_type_origin = data_types[i]; + const auto & [c2, right_type_origin, right_name] = right_table_keys.safeGetByPosition(i); + auto left_type = removeNullable(recursiveRemoveLowCardinality(left_type_origin)); + auto right_type = removeNullable(recursiveRemoveLowCardinality(right_type_origin)); + if (!left_type->equals(*right_type)) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in joinGet key {}: " + "found type {}, while the needed type is {}", i, left_type->getName(), right_type->getName()); + } + + if (!sample_block_with_columns_to_add.has(column_name)) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "StorageJoin doesn't contain column {}", column_name); + + auto elem = sample_block_with_columns_to_add.getByName(column_name); + if (or_null && JoinCommon::canBecomeNullable(elem.type)) + elem.type = makeNullable(elem.type); + return elem.type; +} + +/// TODO: return multiple columns as named tuple +/// TODO: return array of values when strictness == JoinStrictness::All +ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const +{ + bool is_valid = (strictness == JoinStrictness::Any || strictness == JoinStrictness::RightAny) + && kind == JoinKind::Left; + if (!is_valid) + throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "joinGet only supports StorageJoin of type Left Any"); + const auto & key_names_right = table_join->getOnlyClause().key_names_right; + + /// Assemble the key block with correct names. + Block keys; + for (size_t i = 0; i < block.columns(); ++i) + { + auto key = block.getByPosition(i); + key.name = key_names_right[i]; + keys.insert(std::move(key)); + } + + static_assert(!MapGetter::flagged, + "joinGet are not protected from hash table changes between block processing"); + + std::vector maps_vector; + maps_vector.push_back(&std::get(data->maps[0])); + HashJoinMethods::joinBlockImpl(*this, keys, block_with_columns_to_add, maps_vector, /* is_join_get = */ true); + return keys.getByPosition(keys.columns() - 1); +} + +void HashJoin::checkTypesOfKeys(const Block & block) const +{ + for (const auto & onexpr : table_join->getClauses()) + { + JoinCommon::checkTypesOfKeys(block, onexpr.key_names_left, right_table_keys, onexpr.key_names_right); + } +} + +void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) +{ + if (!data) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); + + for (const auto & onexpr : table_join->getClauses()) + { + auto cond_column_name = onexpr.condColumnNames(); + JoinCommon::checkTypesOfKeys( + block, onexpr.key_names_left, cond_column_name.first, + right_sample_block, onexpr.key_names_right, cond_column_name.second); + } + + if (kind == JoinKind::Cross) + { + joinBlockImplCross(block, not_processed); + return; + } + + if (kind == JoinKind::Right || kind == JoinKind::Full) + { + materializeBlockInplace(block); + } + + { + std::vectormaps[0])> * > maps_vector; + for (size_t i = 0; i < table_join->getClauses().size(); ++i) + maps_vector.push_back(&data->maps[i]); + + if (joinDispatch(kind, strictness, maps_vector, [&](auto kind_, auto strictness_, auto & maps_vector_) + { + using MapType = typename MapGetter::Map; + Block remaining_block = HashJoinMethods::joinBlockImpl( + *this, block, sample_block_with_columns_to_add, maps_vector_); + if (remaining_block.rows()) + not_processed = std::make_shared(ExtraBlock{std::move(remaining_block)}); + else + not_processed.reset(); + })) + { + /// Joined + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong JOIN combination: {} {}", strictness, kind); + } +} + +HashJoin::~HashJoin() +{ + if (!data) + { + LOG_TEST(log, "{}Join data has been already released", instance_log_id); + return; + } + LOG_TEST( + log, + "{}Join data is being destroyed, {} bytes and {} rows in hash table", + instance_log_id, + getTotalByteCount(), + getTotalRowCount()); +} + +template +struct AdderNonJoined +{ + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) + { + constexpr bool mapped_asof = std::is_same_v; + [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; + + if constexpr (mapped_asof) + { + /// Do nothing + } + else if constexpr (mapped_one) + { + for (size_t j = 0; j < columns_right.size(); ++j) + { + const auto & mapped_column = mapped.block->getByPosition(j).column; + columns_right[j]->insertFrom(*mapped_column, mapped.row_num); + } + + ++rows_added; + } + else + { + for (auto it = mapped.begin(); it.ok(); ++it) + { + for (size_t j = 0; j < columns_right.size(); ++j) + { + const auto & mapped_column = it->block->getByPosition(j).column; + columns_right[j]->insertFrom(*mapped_column, it->row_num); + } + + ++rows_added; + } + } + } +}; + +/// Stream from not joined earlier rows of the right table. +/// Based on: +/// - map offsetInternal saved in used_flags for single disjuncts +/// - flags in BlockWithFlags for multiple disjuncts +class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller +{ +public: + NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_, bool flag_per_row_) + : parent(parent_) + , max_block_size(max_block_size_) + , flag_per_row(flag_per_row_) + , current_block_start(0) + { + if (parent.data == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); + } + + Block getEmptyBlock() override { return parent.savedBlockSample().cloneEmpty(); } + + size_t fillColumns(MutableColumns & columns_right) override + { + size_t rows_added = 0; + if (unlikely(parent.data->type == HashJoin::Type::EMPTY)) + { + rows_added = fillColumnsFromData(parent.data->blocks, columns_right); + } + else + { + auto fill_callback = [&](auto, auto, auto & map) + { + rows_added = fillColumnsFromMap(map, columns_right); + }; + + if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness); + } + + if (!flag_per_row) + { + fillNullsFromBlocks(columns_right, rows_added); + } + + return rows_added; + } + +private: + const HashJoin & parent; + UInt64 max_block_size; + bool flag_per_row; + + size_t current_block_start; + + std::any position; + std::optional nulls_position; + std::optional used_position; + + size_t fillColumnsFromData(const BlocksList & blocks, MutableColumns & columns_right) + { + if (!position.has_value()) + position = std::make_any(blocks.begin()); + + auto & block_it = std::any_cast(position); + auto end = blocks.end(); + + size_t rows_added = 0; + for (; block_it != end; ++block_it) + { + size_t rows_from_block = std::min(max_block_size - rows_added, block_it->rows() - current_block_start); + for (size_t j = 0; j < columns_right.size(); ++j) + { + const auto & col = block_it->getByPosition(j).column; + columns_right[j]->insertRangeFrom(*col, current_block_start, rows_from_block); + } + rows_added += rows_from_block; + + if (rows_added >= max_block_size) + { + /// How many rows have been read + current_block_start += rows_from_block; + if (block_it->rows() <= current_block_start) + { + /// current block was fully read + ++block_it; + current_block_start = 0; + } + break; + } + current_block_start = 0; + } + return rows_added; + } + + template + size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right) + { + switch (parent.data->type) + { + #define M(TYPE) \ + case HashJoin::Type::TYPE: \ + return fillColumns(*maps.TYPE, columns_keys_and_right); + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + default: + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type); + } + } + + template + size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right) + { + size_t rows_added = 0; + + if (flag_per_row) + { + if (!used_position.has_value()) + used_position = parent.data->blocks.begin(); + + auto end = parent.data->blocks.end(); + + for (auto & it = *used_position; it != end && rows_added < max_block_size; ++it) + { + const Block & mapped_block = *it; + + for (size_t row = 0; row < mapped_block.rows(); ++row) + { + if (!parent.isUsed(&mapped_block, row)) + { + for (size_t colnum = 0; colnum < columns_keys_and_right.size(); ++colnum) + { + columns_keys_and_right[colnum]->insertFrom(*mapped_block.getByPosition(colnum).column, row); + } + + ++rows_added; + } + } + } + } + else + { + using Mapped = typename Map::mapped_type; + using Iterator = typename Map::const_iterator; + + + if (!position.has_value()) + position = std::make_any(map.begin()); + + Iterator & it = std::any_cast(position); + auto end = map.end(); + + for (; it != end; ++it) + { + const Mapped & mapped = it->getMapped(); + + size_t offset = map.offsetInternal(it.getPtr()); + if (parent.isUsed(offset)) + continue; + AdderNonJoined::add(mapped, rows_added, columns_keys_and_right); + + if (rows_added >= max_block_size) + { + ++it; + break; + } + } + } + + return rows_added; + } + + void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added) + { + if (!nulls_position.has_value()) + nulls_position = parent.data->blocks_nullmaps.begin(); + + auto end = parent.data->blocks_nullmaps.end(); + + for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it) + { + const auto * block = it->first; + ConstNullMapPtr nullmap = nullptr; + if (it->second) + nullmap = &assert_cast(*it->second).getData(); + + for (size_t row = 0; row < block->rows(); ++row) + { + if (nullmap && (*nullmap)[row]) + { + for (size_t col = 0; col < columns_keys_and_right.size(); ++col) + columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row); + ++rows_added; + } + } + } + } +}; + +IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block, + const Block & result_sample_block, + UInt64 max_block_size) const +{ + if (!JoinCommon::hasNonJoinedBlocks(*table_join)) + return {}; + size_t left_columns_count = left_sample_block.columns(); + + bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); + if (!flag_per_row) + { + /// With multiple disjuncts, all keys are in sample_block_with_columns_to_add, so invariant is not held + size_t expected_columns_count = left_columns_count + required_right_keys.columns() + sample_block_with_columns_to_add.columns(); + if (expected_columns_count != result_sample_block.columns()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected number of columns in result sample block: {} instead of {} ({} + {} + {})", + result_sample_block.columns(), expected_columns_count, + left_columns_count, required_right_keys.columns(), sample_block_with_columns_to_add.columns()); + } + } + + auto non_joined = std::make_unique(*this, max_block_size, flag_per_row); + return std::make_unique(std::move(non_joined), result_sample_block, left_columns_count, *table_join); +} + +void HashJoin::reuseJoinedData(const HashJoin & join) +{ + data = join.data; + from_storage_join = true; + + bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); + if (flag_per_row) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); + + for (auto & map : data->maps) + { + joinDispatch(kind, strictness, map, [this](auto kind_, auto strictness_, auto & map_) + { + used_flags->reinit(map_.getBufferSizeInCells(data->type) + 1); + }); + } +} + +BlocksList HashJoin::releaseJoinedBlocks(bool restructure) +{ + LOG_TRACE(log, "{}Join data is being released, {} bytes and {} rows in hash table", instance_log_id, getTotalByteCount(), getTotalRowCount()); + + BlocksList right_blocks = std::move(data->blocks); + if (!restructure) + { + data.reset(); + return right_blocks; + } + + data->maps.clear(); + data->blocks_nullmaps.clear(); + + BlocksList restored_blocks; + + /// names to positions optimization + std::vector positions; + std::vector is_nullable; + if (!right_blocks.empty()) + { + positions.reserve(right_sample_block.columns()); + const Block & tmp_block = *right_blocks.begin(); + for (const auto & sample_column : right_sample_block) + { + positions.emplace_back(tmp_block.getPositionByName(sample_column.name)); + is_nullable.emplace_back(isNullableOrLowCardinalityNullable(sample_column.type)); + } + } + + for (Block & saved_block : right_blocks) + { + Block restored_block; + for (size_t i = 0; i < positions.size(); ++i) + { + auto & column = saved_block.getByPosition(positions[i]); + correctNullabilityInplace(column, is_nullable[i]); + restored_block.insert(column); + } + restored_blocks.emplace_back(std::move(restored_block)); + } + + data.reset(); + return restored_blocks; +} + +const ColumnWithTypeAndName & HashJoin::rightAsofKeyColumn() const +{ + /// It should be nullable when right side is nullable + return savedBlockSample().getByName(table_join->getOnlyClause().key_names_right.back()); +} + +void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additional_filter_expression) +{ + if (!additional_filter_expression) + return; + + Block expression_sample_block = additional_filter_expression->getSampleBlock(); + + if (expression_sample_block.columns() != 1) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected expression in JOIN ON section. Expected single column, got '{}'", + expression_sample_block.dumpStructure()); + } + + auto type = removeNullable(expression_sample_block.getByPosition(0).type); + if (!type->equals(*std::make_shared())) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected expression in JOIN ON section. Expected boolean (UInt8), got '{}'. expression:\n{}", + expression_sample_block.getByPosition(0).type->getName(), + additional_filter_expression->dumpActions()); + } + + bool is_supported = (strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind)); + if (!is_supported) + { + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "Non equi condition '{}' from JOIN ON section is supported only for ALL INNER/LEFT/FULL/RIGHT JOINs", + expression_sample_block.getByPosition(0).name); + } +} + +bool HashJoin::isUsed(size_t off) const +{ + return used_flags->getUsedSafe(off); +} + +bool HashJoin::isUsed(const Block * block_ptr, size_t row_idx) const +{ + return used_flags->getUsedSafe(block_ptr, row_idx); +} + + +bool HashJoin::needUsedFlagsForPerRightTableRow(std::shared_ptr table_join_) const +{ + if (!table_join_->oneDisjunct()) + return true; + /// If it'a a all right join with inequal conditions, we need to mark each row + if (table_join_->getMixedJoinExpression() && isRightOrFull(table_join_->kind())) + return true; + return false; +} + +} diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin/HashJoin.h similarity index 91% rename from src/Interpreters/HashJoin.h rename to src/Interpreters/HashJoin/HashJoin.h index a0996556f9a..0b115b9fdbb 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin/HashJoin.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -36,47 +37,13 @@ class ExpressionActions; namespace JoinStuff { - /// Flags needed to implement RIGHT and FULL JOINs. -class JoinUsedFlags -{ - using RawBlockPtr = const Block *; - using UsedFlagsForBlock = std::vector; - - /// For multiple dijuncts each empty in hashmap stores flags for particular block - /// For single dicunct we store all flags in `nullptr` entry, index is the offset in FindResult - std::unordered_map flags; - - bool need_flags; - -public: - /// Update size for vector with flags. - /// Calling this method invalidates existing flags. - /// It can be called several times, but all of them should happen before using this structure. - template - void reinit(size_t size_); - - template - void reinit(const Block * block_ptr); - - bool getUsedSafe(size_t i) const; - bool getUsedSafe(const Block * block_ptr, size_t row_idx) const; - - template - void setUsed(const T & f); - - template - void setUsed(const Block * block, size_t row_num, size_t offset); - - template - bool getUsed(const T & f); - - template - bool setUsedOnce(const T & f); -}; - +class JoinUsedFlags; } +template +class HashJoinMethods; + /** Data structure for implementation of JOIN. * It is just a hash table: keys -> rows of joined ("right") table. * Additionally, CROSS JOIN is supported: instead of hash table, it use just set of blocks without keys. @@ -400,8 +367,8 @@ public: const Block & savedBlockSample() const { return data->sample_block; } - bool isUsed(size_t off) const { return used_flags.getUsedSafe(off); } - bool isUsed(const Block * block_ptr, size_t row_idx) const { return used_flags.getUsedSafe(block_ptr, row_idx); } + bool isUsed(size_t off) const; + bool isUsed(const Block * block_ptr, size_t row_idx) const; void debugKeys() const; @@ -414,6 +381,9 @@ private: friend class JoinSource; + template + friend class HashJoinMethods; + std::shared_ptr table_join; const JoinKind kind; const JoinStrictness strictness; @@ -433,8 +403,10 @@ private: /// Number of this flags equals to hashtable buffer size (plus one for zero value). /// Changes in hash table broke correspondence, /// so we must guarantee constantness of hash table during HashJoin lifetime (using method setLock) - mutable JoinStuff::JoinUsedFlags used_flags; + mutable std::unique_ptr used_flags; RightTableDataPtr data; + bool have_compressed = false; + std::vector key_sizes; /// Needed to do external cross join @@ -473,13 +445,6 @@ private: void initRightBlockStructure(Block & saved_block_sample); - template - Block joinBlockImpl( - Block & block, - const Block & block_with_columns_to_add, - const std::vector & maps_, - bool is_join_get = false) const; - void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const; static Type chooseMethod(JoinKind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes); diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h new file mode 100644 index 00000000000..0dfafa94efc --- /dev/null +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -0,0 +1,954 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int UNSUPPORTED_JOIN_KEYS; + extern const int LOGICAL_ERROR; +} + +/// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. +template +struct Inserter +{ + static ALWAYS_INLINE bool + insertOne(const HashJoin & join, HashMap & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + { + auto emplace_result = key_getter.emplaceKey(map, i, pool); + + if (emplace_result.isInserted() || join.anyTakeLastRow()) + { + new (&emplace_result.getMapped()) typename HashMap::mapped_type(stored_block, i); + return true; + } + return false; + } + + static ALWAYS_INLINE void insertAll(const HashJoin &, HashMap & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + { + auto emplace_result = key_getter.emplaceKey(map, i, pool); + + if (emplace_result.isInserted()) + new (&emplace_result.getMapped()) typename HashMap::mapped_type(stored_block, i); + else + { + /// The first element of the list is stored in the value of the hash table, the rest in the pool. + emplace_result.getMapped().insert({stored_block, i}, pool); + } + } + + static ALWAYS_INLINE void insertAsof( + HashJoin & join, HashMap & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, const IColumn & asof_column) + { + auto emplace_result = key_getter.emplaceKey(map, i, pool); + typename HashMap::mapped_type * time_series_map = &emplace_result.getMapped(); + + TypeIndex asof_type = *join.getAsofType(); + if (emplace_result.isInserted()) + time_series_map = new (time_series_map) typename HashMap::mapped_type(createAsofRowRef(asof_type, join.getAsofInequality())); + (*time_series_map)->insert(asof_column, stored_block, i); + } +}; + + +/// MapsTemplate is one of MapsOne, MapsAll and MapsAsof +template +class HashJoinMethods +{ +public: + static size_t insertFromBlockImpl( + HashJoin & join, + HashJoin::Type type, + MapsTemplate & maps, + size_t rows, + const ColumnRawPtrs & key_columns, + const Sizes & key_sizes, + Block * stored_block, + ConstNullMapPtr null_map, + UInt8ColumnDataPtr join_mask, + Arena & pool, + bool & is_inserted) + { + switch (type) + { + case HashJoin::Type::EMPTY: + [[fallthrough]]; + case HashJoin::Type::CROSS: + /// Do nothing. We will only save block, and it is enough + is_inserted = true; + return 0; + + #define M(TYPE) \ + case HashJoin::Type::TYPE: \ + return insertFromBlockImplTypeCase>::Type>(\ + join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ + break; + + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + } + } + + using MapsTemplateVector = std::vector; + + static Block joinBlockImpl( + const HashJoin & join, + Block & block, + const Block & block_with_columns_to_add, + const MapsTemplateVector & maps_, + bool is_join_get = false) + { + constexpr JoinFeatures join_features; + + std::vector join_on_keys; + const auto & onexprs = join.table_join->getClauses(); + for (size_t i = 0; i < onexprs.size(); ++i) + { + const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right; + join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, join.key_sizes[i]); + } + size_t existing_columns = block.columns(); + + /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. + * Because if they are constants, then in the "not joined" rows, they may have different values + * - default values, which can differ from the values of these constants. + */ + if constexpr (join_features.right || join_features.full) + { + materializeBlockInplace(block); + } + + /** For LEFT/INNER JOIN, the saved blocks do not contain keys. + * For FULL/RIGHT JOIN, the saved blocks contain keys; + * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. + * For ASOF, the last column is used as the ASOF column + */ + AddedColumns added_columns( + block, + block_with_columns_to_add, + join.savedBlockSample(), + join, + std::move(join_on_keys), + join.table_join->getMixedJoinExpression(), + join_features.is_asof_join, + is_join_get); + + bool has_required_right_keys = (join.required_right_keys.columns() != 0); + added_columns.need_filter = join_features.need_filter || has_required_right_keys; + added_columns.max_joined_block_rows = join.max_joined_block_rows; + if (!added_columns.max_joined_block_rows) + added_columns.max_joined_block_rows = std::numeric_limits::max(); + else + added_columns.reserve(join_features.need_replication); + + size_t num_joined = switchJoinRightColumns(maps_, added_columns, join.data->type, *join.used_flags); + /// Do not hold memory for join_on_keys anymore + added_columns.join_on_keys.clear(); + Block remaining_block = sliceBlock(block, num_joined); + + added_columns.buildOutput(); + for (size_t i = 0; i < added_columns.size(); ++i) + block.insert(added_columns.moveColumn(i)); + + std::vector right_keys_to_replicate [[maybe_unused]]; + + if constexpr (join_features.need_filter) + { + /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1); + + /// Add join key columns from right block if needed using value from left table because of equality + for (size_t i = 0; i < join.required_right_keys.columns(); ++i) + { + const auto & right_key = join.required_right_keys.getByPosition(i); + /// asof column is already in block. + if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) + continue; + + const auto & left_column = block.getByName(join.required_right_keys_sources[i]); + const auto & right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); + auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column); + block.insert(std::move(right_col)); + } + } + else if (has_required_right_keys) + { + /// Add join key columns from right block if needed. + for (size_t i = 0; i < join.required_right_keys.columns(); ++i) + { + const auto & right_key = join.required_right_keys.getByPosition(i); + auto right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); + /// asof column is already in block. + if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) + continue; + + const auto & left_column = block.getByName(join.required_right_keys_sources[i]); + auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter); + block.insert(std::move(right_col)); + + if constexpr (join_features.need_replication) + right_keys_to_replicate.push_back(block.getPositionByName(right_col_name)); + } + } + + if constexpr (join_features.need_replication) + { + std::unique_ptr & offsets_to_replicate = added_columns.offsets_to_replicate; + + /// If ALL ... JOIN - we replicate all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + { + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); + } + + /// Replicate additional right keys + for (size_t pos : right_keys_to_replicate) + { + block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); + } + } + + return remaining_block; + } + +private: + template + static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) + { + if constexpr (is_asof_join) + { + auto key_column_copy = key_columns; + auto key_size_copy = key_sizes; + key_column_copy.pop_back(); + key_size_copy.pop_back(); + return KeyGetter(key_column_copy, key_size_copy, nullptr); + } + else + return KeyGetter(key_columns, key_sizes, nullptr); + } + + template + static size_t NO_INLINE insertFromBlockImplTypeCase( + HashJoin & join, HashMap & map, size_t rows, const ColumnRawPtrs & key_columns, + const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) + { + [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; + constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; + + const IColumn * asof_column [[maybe_unused]] = nullptr; + if constexpr (is_asof_join) + asof_column = key_columns.back(); + + auto key_getter = createKeyGetter(key_columns, key_sizes); + + /// For ALL and ASOF join always insert values + is_inserted = !mapped_one || is_asof_join; + + for (size_t i = 0; i < rows; ++i) + { + if (null_map && (*null_map)[i]) + { + /// nulls are not inserted into hash table, + /// keep them for RIGHT and FULL joins + is_inserted = true; + continue; + } + + /// Check condition for right table from ON section + if (join_mask && !(*join_mask)[i]) + continue; + + if constexpr (is_asof_join) + Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column); + else if constexpr (mapped_one) + is_inserted |= Inserter::insertOne(join, map, key_getter, stored_block, i, pool); + else + Inserter::insertAll(join, map, key_getter, stored_block, i, pool); + } + return map.getBufferSizeInCells(); + } + + template + static size_t switchJoinRightColumns( + const std::vector & mapv, + AddedColumns & added_columns, + HashJoin::Type type, + JoinStuff::JoinUsedFlags & used_flags) + { + constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; + switch (type) + { + case HashJoin::Type::EMPTY: { + if constexpr (!is_asof_join) + { + using KeyGetter = KeyGetterEmpty; + std::vector key_getter_vector; + key_getter_vector.emplace_back(); + + using MapTypeVal = typename KeyGetter::MappedType; + std::vector a_map_type_vector; + a_map_type_vector.emplace_back(); + return joinRightColumnsSwitchNullability( + std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); + } + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type); + } + #define M(TYPE) \ + case HashJoin::Type::TYPE: \ + { \ + using MapTypeVal = const typename std::remove_reference_t::element_type; \ + using KeyGetter = typename KeyGetterForType::Type; \ + std::vector a_map_type_vector(mapv.size()); \ + std::vector key_getter_vector; \ + for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \ + { \ + const auto & join_on_key = added_columns.join_on_keys[d]; \ + a_map_type_vector[d] = mapv[d]->TYPE.get(); \ + key_getter_vector.push_back(std::move(createKeyGetter(join_on_key.key_columns, join_on_key.key_sizes))); \ + } \ + return joinRightColumnsSwitchNullability( \ + std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); \ + } + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + + default: + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type); + } + } + + template + static size_t joinRightColumnsSwitchNullability( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) + { + if (added_columns.need_filter) + { + return joinRightColumnsSwitchMultipleDisjuncts( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); + } + else + { + return joinRightColumnsSwitchMultipleDisjuncts( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); + } + } + + template + static size_t joinRightColumnsSwitchMultipleDisjuncts( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) + { + constexpr JoinFeatures join_features; + if constexpr (join_features.is_all_join) + { + if (added_columns.additional_filter_expression) + { + bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; + return joinRightColumnsWithAddtitionalFilter( + std::forward>(key_getter_vector), + mapv, + added_columns, + used_flags, + need_filter, + join_features.need_flags, + join_features.add_missing, + mark_per_row_used); + } + } + + if (added_columns.additional_filter_expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN"); + + return mapv.size() > 1 ? joinRightColumns( + std::forward>(key_getter_vector), mapv, added_columns, used_flags) + : joinRightColumns( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); + } + + /// Joins right table columns which indexes are present in right_indexes using specified map. + /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). + template + static size_t joinRightColumns( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) + { + constexpr JoinFeatures join_features; + + size_t rows = added_columns.rows_to_add; + if constexpr (need_filter) + added_columns.filter = IColumn::Filter(rows, 0); + + Arena pool; + + if constexpr (join_features.need_replication) + added_columns.offsets_to_replicate = std::make_unique(rows); + + IColumn::Offset current_offset = 0; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t i = 0; + for (; i < rows; ++i) + { + if constexpr (join_features.need_replication) + { + if (unlikely(current_offset >= max_joined_block_rows)) + { + added_columns.offsets_to_replicate->resize_assume_reserved(i); + added_columns.filter.resize_assume_reserved(i); + break; + } + } + + bool right_row_found = false; + + KnownRowsHolder known_rows; + for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) + { + const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; + if (join_keys.null_map && (*join_keys.null_map)[i]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(i); + using FindResult = typename KeyGetter::FindResult; + auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); + + if (find_result.isFound()) + { + right_row_found = true; + auto & mapped = find_result.getMapped(); + if constexpr (join_features.is_asof_join) + { + const IColumn & left_asof_key = added_columns.leftAsofKey(); + + auto row_ref = mapped->findAsof(left_asof_key, i); + if (row_ref.block) + { + setUsed(added_columns.filter, i); + if constexpr (flag_per_row) + used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); + else + used_flags.template setUsed(find_result); + + added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing); + } + else + addNotFoundRow(added_columns, current_offset); + } + else if constexpr (join_features.is_all_join) + { + setUsed(added_columns.filter, i); + used_flags.template setUsed(find_result); + auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + } + else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) + { + /// Use first appeared left key + it needs left columns replication + bool used_once = used_flags.template setUsedOnce(find_result); + if (used_once) + { + auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; + setUsed(added_columns.filter, i); + addFoundRowAll( + mapped, added_columns, current_offset, known_rows, used_flags_opt); + } + } + else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) + { + bool used_once = used_flags.template setUsedOnce(find_result); + + /// Use first appeared left key only + if (used_once) + { + setUsed(added_columns.filter, i); + added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); + } + + break; + } + else if constexpr (join_features.is_any_join && join_features.full) + { + /// TODO + } + else if constexpr (join_features.is_anti_join) + { + if constexpr (join_features.right && join_features.need_flags) + used_flags.template setUsed(find_result); + } + else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) + { + setUsed(added_columns.filter, i); + used_flags.template setUsed(find_result); + added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); + + if (join_features.is_any_or_semi_join) + { + break; + } + } + } + } + + if (!right_row_found) + { + if constexpr (join_features.is_anti_join && join_features.left) + setUsed(added_columns.filter, i); + addNotFoundRow(added_columns, current_offset); + } + + if constexpr (join_features.need_replication) + { + (*added_columns.offsets_to_replicate)[i] = current_offset; + } + } + + added_columns.applyLazyDefaults(); + return i; + } + + template + static void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]) + { + if constexpr (need_filter) + filter[pos] = 1; + } + + template + static ColumnPtr buildAdditionalFilter( + size_t left_start_row, + const std::vector & selected_rows, + const std::vector & row_replicate_offset, + AddedColumns & added_columns) + { + ColumnPtr result_column; + do + { + if (selected_rows.empty()) + { + result_column = ColumnUInt8::create(); + break; + } + const Block & sample_right_block = *selected_rows.begin()->block; + if (!sample_right_block || !added_columns.additional_filter_expression) + { + auto filter = ColumnUInt8::create(); + filter->insertMany(1, selected_rows.size()); + result_column = std::move(filter); + break; + } + + auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); + if (required_cols.empty()) + { + Block block; + added_columns.additional_filter_expression->execute(block); + result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); + break; + } + NameSet required_column_names; + for (auto & col : required_cols) + required_column_names.insert(col.name); + + Block executed_block; + size_t right_col_pos = 0; + for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) + { + if (required_column_names.contains(col.name)) + { + auto new_col = col.column->cloneEmpty(); + for (const auto & selected_row : selected_rows) + { + const auto & src_col = selected_row.block->getByPosition(right_col_pos); + new_col->insertFrom(*src_col.column, selected_row.row_num); + } + executed_block.insert({std::move(new_col), col.type, col.name}); + } + right_col_pos += 1; + } + if (!executed_block) + { + result_column = ColumnUInt8::create(); + break; + } + + for (const auto & col_name : required_column_names) + { + const auto * src_col = added_columns.left_block.findByName(col_name); + if (!src_col) + continue; + auto new_col = src_col->column->cloneEmpty(); + size_t prev_left_offset = 0; + for (size_t i = 1; i < row_replicate_offset.size(); ++i) + { + const size_t & left_offset = row_replicate_offset[i]; + size_t rows = left_offset - prev_left_offset; + if (rows) + new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); + prev_left_offset = left_offset; + } + executed_block.insert({std::move(new_col), src_col->type, col_name}); + } + if (!executed_block) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", + required_cols.toString(), + sample_right_block.dumpNames(), + added_columns.left_block.dumpNames()); + } + + for (const auto & col : executed_block.getColumnsWithTypeAndName()) + if (!col.column || !col.type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); + + added_columns.additional_filter_expression->execute(executed_block); + result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); + executed_block.clear(); + } while (false); + + result_column = result_column->convertToFullIfNeeded(); + if (result_column->isNullable()) + { + /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros + /// Trying to avoid copying data, since we are the only owner of the column. + ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); + + MutableColumnPtr mutable_column; + { + ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); + result_column.reset(); + mutable_column = IColumn::mutate(std::move(nested_column)); + } + + auto & column_data = assert_cast(*mutable_column).getData(); + const auto & mask_column_data = assert_cast(*mask_column).getData(); + for (size_t i = 0; i < column_data.size(); ++i) + { + if (mask_column_data[i]) + column_data[i] = 0; + } + return mutable_column; + } + return result_column; + } + + /// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. + template + static size_t joinRightColumnsWithAddtitionalFilter( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], + bool need_filter [[maybe_unused]], + bool need_flags [[maybe_unused]], + bool add_missing [[maybe_unused]], + bool flag_per_row [[maybe_unused]]) + { + size_t left_block_rows = added_columns.rows_to_add; + if (need_filter) + added_columns.filter = IColumn::Filter(left_block_rows, 0); + + std::unique_ptr pool; + + if constexpr (need_replication) + added_columns.offsets_to_replicate = std::make_unique(left_block_rows); + + std::vector row_replicate_offset; + row_replicate_offset.reserve(left_block_rows); + + using FindResult = typename KeyGetter::FindResult; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t left_row_iter = 0; + PreSelectedRows selected_rows; + selected_rows.reserve(left_block_rows); + std::vector find_results; + find_results.reserve(left_block_rows); + bool exceeded_max_block_rows = false; + IColumn::Offset total_added_rows = 0; + IColumn::Offset current_added_rows = 0; + + auto collect_keys_matched_rows_refs = [&]() + { + pool = std::make_unique(); + find_results.clear(); + row_replicate_offset.clear(); + row_replicate_offset.push_back(0); + current_added_rows = 0; + selected_rows.clear(); + for (; left_row_iter < left_block_rows; ++left_row_iter) + { + if constexpr (need_replication) + { + if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) + { + break; + } + } + KnownRowsHolder all_flag_known_rows; + KnownRowsHolder single_flag_know_rows; + for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) + { + const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; + if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); + auto find_result = row_acceptable + ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) + : FindResult(); + + if (find_result.isFound()) + { + auto & mapped = find_result.getMapped(); + find_results.push_back(find_result); + if (flag_per_row) + addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); + else + addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); + } + } + row_replicate_offset.push_back(current_added_rows); + } + }; + + auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) + { + const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); + + size_t prev_replicated_row = 0; + auto selected_right_row_it = selected_rows.begin(); + size_t find_result_index = 0; + for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) + { + bool any_matched = false; + /// For all right join, flag_per_row is true, we need mark used flags for each row. + if (flag_per_row) + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); + total_added_rows += 1; + if (need_flags) + used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); + } + ++selected_right_row_it; + } + } + else + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); + total_added_rows += 1; + } + ++selected_right_row_it; + } + } + if (!any_matched) + { + if (add_missing) + addNotFoundRow(added_columns, total_added_rows); + else + addNotFoundRow(added_columns, total_added_rows); + } + else + { + if (!flag_per_row && need_flags) + used_flags.template setUsed(find_results[find_result_index]); + if (need_filter) + setUsed(added_columns.filter, left_start_row + i - 1); + if (add_missing) + added_columns.applyLazyDefaults(); + } + find_result_index += (prev_replicated_row != row_replicate_offset[i]); + + if constexpr (need_replication) + { + (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; + } + prev_replicated_row = row_replicate_offset[i]; + } + }; + + while (left_row_iter < left_block_rows && !exceeded_max_block_rows) + { + auto left_start_row = left_row_iter; + collect_keys_matched_rows_refs(); + if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " + "left_start_row: {}", + selected_rows.size(), + current_added_rows, + row_replicate_offset.size(), + left_row_iter, + left_start_row); + } + auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); + copy_final_matched_rows(left_start_row, filter_col); + + if constexpr (need_replication) + { + // Add a check for current_added_rows to avoid run the filter expression on too small size batch. + if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) + exceeded_max_block_rows = true; + } + } + + if constexpr (need_replication) + { + added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); + added_columns.filter.resize_assume_reserved(left_row_iter); + } + added_columns.applyLazyDefaults(); + return left_row_iter; + } + + /// Cut first num_rows rows from block in place and returns block with remaining rows + static Block sliceBlock(Block & block, size_t num_rows) + { + size_t total_rows = block.rows(); + if (num_rows >= total_rows) + return {}; + size_t remaining_rows = total_rows - num_rows; + Block remaining_block = block.cloneEmpty(); + for (size_t i = 0; i < block.columns(); ++i) + { + auto & col = block.getByPosition(i); + remaining_block.getByPosition(i).column = col.column->cut(num_rows, remaining_rows); + col.column = col.column->cut(0, num_rows); + } + return remaining_block; + } + + /** Since we do not store right key columns, + * this function is used to copy left key columns to right key columns. + * If the user requests some right columns, we just copy left key columns to right, since they are equal. + * Example: SELECT t1.key, t2.key FROM t1 FULL JOIN t2 ON t1.key = t2.key; + * In that case for matched rows in t2.key we will use values from t1.key. + * However, in some cases we might need to adjust the type of column, e.g. t1.key :: LowCardinality(String) and t2.key :: String + * Also, the nullability of the column might be different. + * Returns the right column after with necessary adjustments. + */ + static ColumnWithTypeAndName copyLeftKeyColumnToRight( + const DataTypePtr & right_key_type, + const String & renamed_right_column, + const ColumnWithTypeAndName & left_column, + const IColumn::Filter * null_map_filter = nullptr) + { + ColumnWithTypeAndName right_column = left_column; + right_column.name = renamed_right_column; + + if (null_map_filter) + right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter); + + bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type); + if (null_map_filter) + correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter); + else + correctNullabilityInplace(right_column, should_be_nullable); + + if (!right_column.type->equals(*right_key_type)) + { + right_column.column = castColumnAccurate(right_column, right_key_type); + right_column.type = right_key_type; + } + + right_column.column = right_column.column->convertToFullColumnIfConst(); + return right_column; + } + + static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) + { + if (nullable) + { + JoinCommon::convertColumnToNullable(column); + } + else + { + /// We have to replace values masked by NULLs with defaults. + if (column.column) + if (const auto * nullable_column = checkAndGetColumn(&*column.column)) + column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); + + JoinCommon::removeColumnNullability(column); + } + } + + static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map) + { + if (nullable) + { + JoinCommon::convertColumnToNullable(column); + if (column.type->isNullable() && !negative_null_map.empty()) + { + MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column)); + assert_cast(*mutable_column).applyNegatedNullMap(negative_null_map); + column.column = std::move(mutable_column); + } + } + else + JoinCommon::removeColumnNullability(column); + } +}; + +/// Instantiate template class ahead in different .cpp files to avoid `too large translation unit`. +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; + +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; + +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; + +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +extern template class HashJoinMethods; +} + diff --git a/src/Interpreters/HashJoin/InnerHashJoin.cpp b/src/Interpreters/HashJoin/InnerHashJoin.cpp new file mode 100644 index 00000000000..85aedf3a8e5 --- /dev/null +++ b/src/Interpreters/HashJoin/InnerHashJoin.cpp @@ -0,0 +1,12 @@ + +#include + +namespace DB +{ +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +} diff --git a/src/Interpreters/HashJoin/JoinFeatures.h b/src/Interpreters/HashJoin/JoinFeatures.h new file mode 100644 index 00000000000..2f2bd1e29a2 --- /dev/null +++ b/src/Interpreters/HashJoin/JoinFeatures.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include +namespace DB +{ +template +struct JoinFeatures +{ + static constexpr bool is_any_join = STRICTNESS == JoinStrictness::Any; + static constexpr bool is_any_or_semi_join = STRICTNESS == JoinStrictness::Any || STRICTNESS == JoinStrictness::RightAny || (STRICTNESS == JoinStrictness::Semi && KIND == JoinKind::Left); + static constexpr bool is_all_join = STRICTNESS == JoinStrictness::All; + static constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; + static constexpr bool is_semi_join = STRICTNESS == JoinStrictness::Semi; + static constexpr bool is_anti_join = STRICTNESS == JoinStrictness::Anti; + + static constexpr bool left = KIND == JoinKind::Left; + static constexpr bool right = KIND == JoinKind::Right; + static constexpr bool inner = KIND == JoinKind::Inner; + static constexpr bool full = KIND == JoinKind::Full; + + static constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right); + static constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left)); + static constexpr bool add_missing = (left || full) && !is_semi_join; + + static constexpr bool need_flags = MapGetter::flagged; +}; + +} diff --git a/src/Interpreters/HashJoin/JoinUsedFlags.h b/src/Interpreters/HashJoin/JoinUsedFlags.h new file mode 100644 index 00000000000..bd41ba2073f --- /dev/null +++ b/src/Interpreters/HashJoin/JoinUsedFlags.h @@ -0,0 +1,154 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace JoinStuff +{ +/// Flags needed to implement RIGHT and FULL JOINs. +class JoinUsedFlags +{ + using RawBlockPtr = const Block *; + using UsedFlagsForBlock = std::vector; + + /// For multiple dijuncts each empty in hashmap stores flags for particular block + /// For single dicunct we store all flags in `nullptr` entry, index is the offset in FindResult + std::unordered_map flags; + + bool need_flags; + +public: + /// Update size for vector with flags. + /// Calling this method invalidates existing flags. + /// It can be called several times, but all of them should happen before using this structure. + template + void reinit(size_t size) + { + if constexpr (MapGetter::flagged) + { + assert(flags[nullptr].size() <= size); + need_flags = true; + // For one disjunct clause case, we don't need to reinit each time we call addBlockToJoin. + // and there is no value inserted in this JoinUsedFlags before addBlockToJoin finish. + // So we reinit only when the hash table is rehashed to a larger size. + if (flags.empty() || flags[nullptr].size() < size) [[unlikely]] + { + flags[nullptr] = std::vector(size); + } + } + } + + template + void reinit(const Block * block_ptr) + { + if constexpr (MapGetter::flagged) + { + assert(flags[block_ptr].size() <= block_ptr->rows()); + need_flags = true; + flags[block_ptr] = std::vector(block_ptr->rows()); + } + } + + bool getUsedSafe(size_t i) const + { + return getUsedSafe(nullptr, i); + } + bool getUsedSafe(const Block * block_ptr, size_t row_idx) const + { + if (auto it = flags.find(block_ptr); it != flags.end()) + return it->second[row_idx].load(); + return !need_flags; + } + + template + void setUsed(const FindResult & f) + { + if constexpr (!use_flags) + return; + + /// Could be set simultaneously from different threads. + if constexpr (flag_per_row) + { + auto & mapped = f.getMapped(); + flags[mapped.block][mapped.row_num].store(true, std::memory_order_relaxed); + } + else + { + flags[nullptr][f.getOffset()].store(true, std::memory_order_relaxed); + } + } + + template + void setUsed(const Block * block, size_t row_num, size_t offset) + { + if constexpr (!use_flags) + return; + + /// Could be set simultaneously from different threads. + if constexpr (flag_per_row) + { + flags[block][row_num].store(true, std::memory_order_relaxed); + } + else + { + flags[nullptr][offset].store(true, std::memory_order_relaxed); + } + } + + template + bool getUsed(const FindResult & f) + { + if constexpr (!use_flags) + return true; + + if constexpr (flag_per_row) + { + auto & mapped = f.getMapped(); + return flags[mapped.block][mapped.row_num].load(); + } + else + { + return flags[nullptr][f.getOffset()].load(); + } + + } + + template + bool setUsedOnce(const FindResult & f) + { + if constexpr (!use_flags) + return true; + + if constexpr (flag_per_row) + { + auto & mapped = f.getMapped(); + + /// fast check to prevent heavy CAS with seq_cst order + if (flags[mapped.block][mapped.row_num].load(std::memory_order_relaxed)) + return false; + + bool expected = false; + return flags[mapped.block][mapped.row_num].compare_exchange_strong(expected, true); + } + else + { + auto off = f.getOffset(); + + /// fast check to prevent heavy CAS with seq_cst order + if (flags[nullptr][off].load(std::memory_order_relaxed)) + return false; + + bool expected = false; + return flags[nullptr][off].compare_exchange_strong(expected, true); + } + + } +}; + +} +} diff --git a/src/Interpreters/HashJoin/KeyGetter.h b/src/Interpreters/HashJoin/KeyGetter.h new file mode 100644 index 00000000000..35ff2bb6eb5 --- /dev/null +++ b/src/Interpreters/HashJoin/KeyGetter.h @@ -0,0 +1,73 @@ +#pragma once +#include + + +namespace DB +{ +template +class KeyGetterEmpty +{ +public: + struct MappedType + { + using mapped_type = Mapped; + }; + + using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl; + + KeyGetterEmpty() = default; + + FindResult findKey(MappedType, size_t, const Arena &) { return FindResult(); } +}; + +template +struct KeyGetterForTypeImpl; + +constexpr bool use_offset = true; + +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodOneNumber; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodString; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodFixedString; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodKeysFixed; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodKeysFixed; +}; +template struct KeyGetterForTypeImpl +{ + using Type = ColumnsHashing::HashMethodHashed; +}; + +template +struct KeyGetterForType +{ + using Value = typename Data::value_type; + using Mapped_t = typename Data::mapped_type; + using Mapped = std::conditional_t, const Mapped_t, Mapped_t>; + using Type = typename KeyGetterForTypeImpl::Type; +}; +} diff --git a/src/Interpreters/HashJoin/KnowRowsHolder.h b/src/Interpreters/HashJoin/KnowRowsHolder.h new file mode 100644 index 00000000000..d51c96893c5 --- /dev/null +++ b/src/Interpreters/HashJoin/KnowRowsHolder.h @@ -0,0 +1,148 @@ +#pragma once +#include +#include +#include +#include +namespace DB +{ + +template +class KnownRowsHolder; + +/// Keep already joined rows to prevent duplication if many disjuncts +/// if for a particular pair of rows condition looks like TRUE or TRUE or TRUE +/// we want to have it once in resultset +template<> +class KnownRowsHolder +{ +public: + using Type = std::pair; + +private: + static const size_t MAX_LINEAR = 16; // threshold to switch from Array to Set + using ArrayHolder = std::array; + using SetHolder = std::set; + using SetHolderPtr = std::unique_ptr; + + ArrayHolder array_holder; + SetHolderPtr set_holder_ptr; + + size_t items; + +public: + KnownRowsHolder() + : items(0) + { + } + + + template + void add(InputIt from, InputIt to) + { + const size_t new_items = std::distance(from, to); + if (items + new_items <= MAX_LINEAR) + { + std::copy(from, to, &array_holder[items]); + } + else + { + if (items <= MAX_LINEAR) + { + set_holder_ptr = std::make_unique(); + set_holder_ptr->insert(std::cbegin(array_holder), std::cbegin(array_holder) + items); + } + set_holder_ptr->insert(from, to); + } + items += new_items; + } + + template + bool isKnown(const Needle & needle) + { + return items <= MAX_LINEAR + ? std::find(std::cbegin(array_holder), std::cbegin(array_holder) + items, needle) != std::cbegin(array_holder) + items + : set_holder_ptr->find(needle) != set_holder_ptr->end(); + } +}; + +template<> +class KnownRowsHolder +{ +public: + template + void add(InputIt, InputIt) + { + } + + template + static bool isKnown(const Needle &) + { + return false; + } +}; + +template +using FindResultImpl = ColumnsHashing::columns_hashing_impl::FindResultImpl; + + +template +void addFoundRowAll( + const typename Map::mapped_type & mapped, + AddedColumns & added, + IColumn::Offset & current_offset, + KnownRowsHolder & known_rows [[maybe_unused]], + JoinStuff::JoinUsedFlags * used_flags [[maybe_unused]]) +{ + if constexpr (add_missing) + added.applyLazyDefaults(); + + if constexpr (flag_per_row) + { + std::unique_ptr::Type>> new_known_rows_ptr; + + for (auto it = mapped.begin(); it.ok(); ++it) + { + if (!known_rows.isKnown(std::make_pair(it->block, it->row_num))) + { + added.appendFromBlock(*it->block, it->row_num, false); + ++current_offset; + if (!new_known_rows_ptr) + { + new_known_rows_ptr = std::make_unique::Type>>(); + } + new_known_rows_ptr->push_back(std::make_pair(it->block, it->row_num)); + if (used_flags) + { + used_flags->JoinStuff::JoinUsedFlags::setUsedOnce( + FindResultImpl(*it, true, 0)); + } + } + } + + if (new_known_rows_ptr) + { + known_rows.add(std::cbegin(*new_known_rows_ptr), std::cend(*new_known_rows_ptr)); + } + } + else + { + for (auto it = mapped.begin(); it.ok(); ++it) + { + added.appendFromBlock(*it->block, it->row_num, false); + ++current_offset; + } + } +} + +template +void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) +{ + if constexpr (add_missing) + { + added.appendDefaultRow(); + if constexpr (need_offset) + ++current_offset; + } +} + +} diff --git a/src/Interpreters/HashJoin/LeftHashJoin.cpp b/src/Interpreters/HashJoin/LeftHashJoin.cpp new file mode 100644 index 00000000000..69e17ff70bd --- /dev/null +++ b/src/Interpreters/HashJoin/LeftHashJoin.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +} diff --git a/src/Interpreters/HashJoin/RightHashJoin.cpp b/src/Interpreters/HashJoin/RightHashJoin.cpp new file mode 100644 index 00000000000..8e304754f5c --- /dev/null +++ b/src/Interpreters/HashJoin/RightHashJoin.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +template class HashJoinMethods; +} diff --git a/src/Interpreters/ITokenExtractor.cpp b/src/Interpreters/ITokenExtractor.cpp index 1c5d0d4b6d4..f0bf90fcb5c 100644 --- a/src/Interpreters/ITokenExtractor.cpp +++ b/src/Interpreters/ITokenExtractor.cpp @@ -240,4 +240,34 @@ bool SplitTokenExtractor::nextInStringLike(const char * data, size_t length, siz return !bad_token && !token.empty(); } +void SplitTokenExtractor::substringToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter, bool is_prefix, bool is_suffix) const +{ + size_t cur = 0; + size_t token_start = 0; + size_t token_len = 0; + + while (cur < length && nextInString(data, length, &cur, &token_start, &token_len)) + // In order to avoid filter updates with incomplete tokens, + // first token is ignored, unless substring is prefix and + // last token is ignored, unless substring is suffix + if ((token_start > 0 || is_prefix) && (token_start + token_len < length || is_suffix)) + bloom_filter.add(data + token_start, token_len); +} + +void SplitTokenExtractor::substringToGinFilter(const char * data, size_t length, GinFilter & gin_filter, bool is_prefix, bool is_suffix) const +{ + gin_filter.setQueryString(data, length); + + size_t cur = 0; + size_t token_start = 0; + size_t token_len = 0; + + while (cur < length && nextInString(data, length, &cur, &token_start, &token_len)) + // In order to avoid filter updates with incomplete tokens, + // first token is ignored, unless substring is prefix and + // last token is ignored, unless substring is suffix + if ((token_start > 0 || is_prefix) && (token_start + token_len < length || is_suffix)) + gin_filter.addTerm(data + token_start, token_len); +} + } diff --git a/src/Interpreters/ITokenExtractor.h b/src/Interpreters/ITokenExtractor.h index 2423ef12311..76711606d09 100644 --- a/src/Interpreters/ITokenExtractor.h +++ b/src/Interpreters/ITokenExtractor.h @@ -28,8 +28,22 @@ struct ITokenExtractor /// It skips unescaped `%` and `_` and supports escaping symbols, but it is less lightweight. virtual bool nextInStringLike(const char * data, size_t length, size_t * pos, String & out) const = 0; + /// Updates Bloom filter from exact-match string filter value virtual void stringToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter) const = 0; + /// Updates Bloom filter from substring-match string filter value. + /// An `ITokenExtractor` implementation may decide to skip certain + /// tokens depending on whether the substring is a prefix or a suffix. + virtual void substringToBloomFilter( + const char * data, + size_t length, + BloomFilter & bloom_filter, + bool is_prefix [[maybe_unused]], + bool is_suffix [[maybe_unused]]) const + { + stringToBloomFilter(data, length, bloom_filter); + } + virtual void stringPaddedToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter) const { stringToBloomFilter(data, length, bloom_filter); @@ -37,8 +51,22 @@ struct ITokenExtractor virtual void stringLikeToBloomFilter(const char * data, size_t length, BloomFilter & bloom_filter) const = 0; + /// Updates GIN filter from exact-match string filter value virtual void stringToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const = 0; + /// Updates GIN filter from substring-match string filter value. + /// An `ITokenExtractor` implementation may decide to skip certain + /// tokens depending on whether the substring is a prefix or a suffix. + virtual void substringToGinFilter( + const char * data, + size_t length, + GinFilter & gin_filter, + bool is_prefix [[maybe_unused]], + bool is_suffix [[maybe_unused]]) const + { + stringToGinFilter(data, length, gin_filter); + } + virtual void stringPaddedToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const { stringToGinFilter(data, length, gin_filter); @@ -148,6 +176,11 @@ struct SplitTokenExtractor final : public ITokenExtractorHelper #include #include -#include +#include namespace DB @@ -23,7 +23,7 @@ BlockIO InterpreterAlterNamedCollectionQuery::execute() return executeDDLQueryOnCluster(query_ptr, current_context, params); } - NamedCollectionUtils::updateFromSQL(query, current_context); + NamedCollectionFactory::instance().updateFromSQL(query); return {}; } diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 2115dc57126..c70a3397f4e 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -175,11 +175,11 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) else throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query"); - if (!getContext()->getSettings().allow_experimental_statistic && ( - command_ast->type == ASTAlterCommand::ADD_STATISTIC || - command_ast->type == ASTAlterCommand::DROP_STATISTIC || - command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTIC)) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Alter table with statistic is now disabled. Turn on allow_experimental_statistic"); + if (!getContext()->getSettings().allow_experimental_statistics && ( + command_ast->type == ASTAlterCommand::ADD_STATISTICS || + command_ast->type == ASTAlterCommand::DROP_STATISTICS || + command_ast->type == ASTAlterCommand::MATERIALIZE_STATISTICS)) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Alter table with statistics is now disabled. Turn on allow_experimental_statistics"); } if (typeid_cast(database.get())) @@ -343,19 +343,24 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS required_access.emplace_back(AccessType::ALTER_SAMPLE_BY, database, table); break; } - case ASTAlterCommand::ADD_STATISTIC: + case ASTAlterCommand::ADD_STATISTICS: { - required_access.emplace_back(AccessType::ALTER_ADD_STATISTIC, database, table); + required_access.emplace_back(AccessType::ALTER_ADD_STATISTICS, database, table); break; } - case ASTAlterCommand::DROP_STATISTIC: + case ASTAlterCommand::MODIFY_STATISTICS: { - required_access.emplace_back(AccessType::ALTER_DROP_STATISTIC, database, table); + required_access.emplace_back(AccessType::ALTER_MODIFY_STATISTICS, database, table); break; } - case ASTAlterCommand::MATERIALIZE_STATISTIC: + case ASTAlterCommand::DROP_STATISTICS: { - required_access.emplace_back(AccessType::ALTER_MATERIALIZE_STATISTIC, database, table); + required_access.emplace_back(AccessType::ALTER_DROP_STATISTICS, database, table); + break; + } + case ASTAlterCommand::MATERIALIZE_STATISTICS: + { + required_access.emplace_back(AccessType::ALTER_MATERIALIZE_STATISTICS, database, table); break; } case ASTAlterCommand::ADD_INDEX: diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index 41e87bb73dd..c71441daa8c 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB @@ -23,7 +23,7 @@ BlockIO InterpreterCreateNamedCollectionQuery::execute() return executeDDLQueryOnCluster(query_ptr, current_context, params); } - NamedCollectionUtils::createFromSQL(query, current_context); + NamedCollectionFactory::instance().createFromSQL(query); return {}; } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index f2e03ca41bd..a990eb651ce 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -88,6 +88,11 @@ #include #include +namespace CurrentMetrics +{ + extern const Metric AttachedTable; +} + namespace DB { @@ -113,6 +118,8 @@ namespace ErrorCodes extern const int UNKNOWN_STORAGE; extern const int SYNTAX_ERROR; extern const int SUPPORT_IS_DISABLED; + extern const int TOO_MANY_TABLES; + extern const int TOO_MANY_DATABASES; } namespace fs = std::filesystem; @@ -138,6 +145,31 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", database_name); } + auto db_num_limit = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; + if (db_num_limit > 0) + { + size_t db_count = DatabaseCatalog::instance().getDatabases().size(); + std::vector system_databases = { + DatabaseCatalog::TEMPORARY_DATABASE, + DatabaseCatalog::SYSTEM_DATABASE, + DatabaseCatalog::INFORMATION_SCHEMA, + DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE, + DatabaseCatalog::DEFAULT_DATABASE + }; + + for (const auto & system_database : system_databases) + { + if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(system_database)) + db_count--; + } + + if (db_count >= db_num_limit) + throw Exception(ErrorCodes::TOO_MANY_DATABASES, + "Too many databases in the Clickhouse. " + "The limit (setting 'max_database_num_to_throw') is set to {}, current number of databases is {}", + db_num_limit, db_count); + } + /// Will write file with database metadata, if needed. String database_name_escaped = escapeForFileName(database_name); fs::path metadata_path = fs::weakly_canonical(getContext()->getPath()); @@ -448,10 +480,10 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) column_declaration->children.push_back(column_declaration->codec); } - if (column.stat) + if (!column.statistics.empty()) { - column_declaration->stat_type = column.stat->ast; - column_declaration->children.push_back(column_declaration->stat_type); + column_declaration->statistics_desc = column.statistics.getAST(); + column_declaration->children.push_back(column_declaration->statistics_desc); } if (column.ttl) @@ -675,11 +707,12 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); } - if (col_decl.stat_type) + column.statistics.column_name = column.name; /// We assign column name here for better exception error message. + if (col_decl.statistics_desc) { - if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistic) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Create table with statistic is now disabled. Turn on allow_experimental_statistic"); - column.stat = StatisticDescription::getStatisticFromColumnDeclaration(col_decl); + if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistics) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Create table with statistics is now disabled. Turn on allow_experimental_statistics"); + column.statistics = ColumnStatisticsDescription::fromColumnDeclaration(col_decl, column.type); } if (col_decl.ttl) @@ -754,7 +787,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental full-text index feature is not enabled (the setting 'allow_experimental_full_text_index')"); /// ---- /// Temporary check during a transition period. Please remove at the end of 2024. - if (index_desc.type == INVERTED_INDEX_NAME && settings.allow_experimental_inverted_index) /// The funny condition is not a mistake, see 02346_fulltext_index_old_name.sql + if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index) throw Exception(ErrorCodes::ILLEGAL_INDEX, "Please use index type 'full_text' instead of 'inverted'"); /// ---- if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index) @@ -865,6 +898,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti assert(as_database_saved.empty() && as_table_saved.empty()); std::swap(create.as_database, as_database_saved); std::swap(create.as_table, as_table_saved); + if (!as_table_saved.empty()) + create.is_create_empty = false; return properties; } @@ -1076,6 +1111,27 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data } +namespace +{ + +void addTableDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context) +{ + QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; + auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); + DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies); +} + +void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, const ASTPtr & query_ptr, const ContextPtr & context) +{ + QualifiedTableName qualified_name{create.getDatabase(), create.getTable()}; + auto ref_dependencies = getDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr, context->getCurrentDatabase()); + auto loading_dependencies = getLoadingDependenciesFromCreateQuery(context->getGlobalContext(), qualified_name, query_ptr); + DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies); +} + +} + BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { /// Temporary tables are created out of databases. @@ -1087,11 +1143,14 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) String current_database = getContext()->getCurrentDatabase(); auto database_name = create.database ? create.getDatabase() : current_database; + bool is_secondary_query = getContext()->getZooKeeperMetadataTransaction() && !getContext()->getZooKeeperMetadataTransaction()->isInitialQuery(); + auto mode = getLoadingStrictnessLevel(create.attach, /*force_attach*/ false, /*has_force_restore_data_flag*/ false, is_secondary_query || is_restore_from_backup); + if (!create.sql_security && create.supportSQLSecurity() && !getContext()->getServerSettings().ignore_empty_sql_security_in_create_view_query) create.sql_security = std::make_shared(); if (create.sql_security) - processSQLSecurityOption(getContext(), create.sql_security->as(), create.attach, create.is_materialized_view); + processSQLSecurityOption(getContext(), create.sql_security->as(), create.is_materialized_view, /* skip_check_permissions= */ mode >= LoadingStrictnessLevel::SECONDARY_CREATE); DDLGuardPtr ddl_guard; @@ -1218,9 +1277,6 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!UserDefinedSQLFunctionFactory::instance().empty()) UserDefinedSQLFunctionVisitor::visit(query_ptr); - bool is_secondary_query = getContext()->getZooKeeperMetadataTransaction() && !getContext()->getZooKeeperMetadataTransaction()->isInitialQuery(); - auto mode = getLoadingStrictnessLevel(create.attach, /*force_attach*/ false, /*has_force_restore_data_flag*/ false, is_secondary_query || is_restore_from_backup); - /// Set and retrieve list of columns, indices and constraints. Set table engine if needed. Rewrite query in canonical way. TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create, mode); @@ -1321,11 +1377,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) return {}; /// If table has dependencies - add them to the graph - QualifiedTableName qualified_name{database_name, create.getTable()}; - auto ref_dependencies = getDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); - auto loading_dependencies = getLoadingDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); - DatabaseCatalog::instance().addDependencies(qualified_name, ref_dependencies, loading_dependencies); - + addTableDependencies(create, query_ptr, getContext()); return fillTableIfNeeded(create); } @@ -1477,6 +1529,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find UUID mapping for {}, it's a bug", create.uuid); } + /// Before actually creating the table, check if it will lead to cyclic dependencies. + checkTableCanBeAddedWithNoCyclicDependencies(create, query_ptr, getContext()); + StoragePtr res; /// NOTE: CREATE query may be rewritten by Storage creator or table function if (create.as_table_function) @@ -1542,6 +1597,17 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } } + UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; + if (table_num_limit > 0 && create.getDatabase() != DatabaseCatalog::SYSTEM_DATABASE) + { + UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); + if (table_count >= table_num_limit) + throw Exception(ErrorCodes::TOO_MANY_TABLES, + "Too many tables in the Clickhouse. " + "The limit (setting 'max_table_num_to_throw') is set to {}, current number of tables is {}", + table_num_limit, table_count); + } + database->createTable(getContext(), create.getTable(), res, query_ptr); /// Move table data to the proper place. Wo do not move data earlier to avoid situations @@ -1577,6 +1643,9 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, ContextMutablePtr create_context = Context::createCopy(current_context); create_context->setQueryContext(std::const_pointer_cast(current_context)); + /// Before actually creating/replacing the table, check if it will lead to cyclic dependencies. + checkTableCanBeAddedWithNoCyclicDependencies(create, query_ptr, create_context); + auto make_drop_context = [&]() -> ContextMutablePtr { ContextMutablePtr drop_context = Context::createCopy(current_context); @@ -1623,6 +1692,9 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, assert(done); created = true; + /// If table has dependencies - add them to the graph + addTableDependencies(create, query_ptr, getContext()); + /// Try fill temporary table BlockIO fill_io = fillTableIfNeeded(create); executeTrivialBlockIO(fill_io, getContext()); @@ -1885,7 +1957,7 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr } } -void InterpreterCreateQuery::processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_attach, bool is_materialized_view) +void InterpreterCreateQuery::processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_materialized_view, bool skip_check_permissions) { /// If no SQL security is specified, apply default from default_*_view_sql_security setting. if (!sql_security.type) @@ -1926,7 +1998,7 @@ void InterpreterCreateQuery::processSQLSecurityOption(ContextPtr context_, ASTSQ } /// Checks the permissions for the specified definer user. - if (sql_security.definer && !sql_security.is_definer_current_user && !is_attach) + if (sql_security.definer && !sql_security.is_definer_current_user && !skip_check_permissions) { const auto definer_name = sql_security.definer->toString(); @@ -1936,7 +2008,7 @@ void InterpreterCreateQuery::processSQLSecurityOption(ContextPtr context_, ASTSQ context_->checkAccess(AccessType::SET_DEFINER, definer_name); } - if (sql_security.type == SQLSecurityType::NONE && !is_attach) + if (sql_security.type == SQLSecurityType::NONE && !skip_check_permissions) context_->checkAccess(AccessType::ALLOW_SQL_SECURITY_NONE); } diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index be4a10eaf1d..70ef29e6b07 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -82,7 +82,7 @@ public: void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const override; /// Check access right, validate definer statement and replace `CURRENT USER` with actual name. - static void processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_attach = false, bool is_materialized_view = false); + static void processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_materialized_view = false, bool skip_check_permissions = false); private: struct TableProperties diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index baadc85f443..2edaef1b2f2 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB @@ -23,7 +23,7 @@ BlockIO InterpreterDropNamedCollectionQuery::execute() return executeDDLQueryOnCluster(query_ptr, current_context, params); } - NamedCollectionUtils::removeFromSQL(query, current_context); + NamedCollectionFactory::instance().removeFromSQL(query); return {}; } diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 458be843b59..3a06e1b2301 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -67,8 +67,8 @@ namespace static void visit(ASTSelectQuery & select, ASTPtr & node, Data & data) { - /// we need to read statistic when `allow_statistic_optimize` is enabled. - bool only_analyze = !data.getContext()->getSettings().allow_statistic_optimize; + /// we need to read statistic when `allow_statistics_optimize` is enabled. + bool only_analyze = !data.getContext()->getSettings().allow_statistics_optimize; InterpreterSelectQuery interpreter( node, data.getContext(), SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze(only_analyze).modify()); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 128854e87ba..f396db70d21 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -26,7 +26,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -279,6 +280,8 @@ Chain InterpreterInsertQuery::buildChain( std::atomic_uint64_t * elapsed_counter_ms, bool check_access) { + IInterpreter::checkStorageSupportsTransactionsIfNeeded(table, getContext()); + ProfileEvents::increment(ProfileEvents::InsertQueriesWithSubqueries); ProfileEvents::increment(ProfileEvents::QueriesWithSubqueries); @@ -623,9 +626,20 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); + size_t threads = presink_chains.size(); + + pipeline.resize(1); + + pipeline.addTransform(std::make_shared( + header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + + pipeline.resize(threads); + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - return std::make_shared( + return std::make_shared( in_header, table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); @@ -681,12 +695,19 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); chain.addSource(std::move(squashing)); + + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(balancing)); } auto context_ptr = getContext(); diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index eeb762b4d7e..32c475d138f 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -127,14 +127,23 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c { StorageID from_table_id{elem.from_database_name, elem.from_table_name}; StorageID to_table_id{elem.to_database_name, elem.to_table_name}; - std::vector ref_dependencies; - std::vector loading_dependencies; + std::vector from_ref_dependencies; + std::vector from_loading_dependencies; + std::vector to_ref_dependencies; + std::vector to_loading_dependencies; - if (!exchange_tables) + if (exchange_tables) { + DatabaseCatalog::instance().checkTablesCanBeExchangedWithNoCyclicDependencies(from_table_id, to_table_id); + std::tie(from_ref_dependencies, from_loading_dependencies) = database_catalog.removeDependencies(from_table_id, false, false); + std::tie(to_ref_dependencies, to_loading_dependencies) = database_catalog.removeDependencies(to_table_id, false, false); + } + else + { + DatabaseCatalog::instance().checkTableCanBeRenamedWithNoCyclicDependencies(from_table_id, to_table_id); bool check_ref_deps = getContext()->getSettingsRef().check_referential_table_dependencies; bool check_loading_deps = !check_ref_deps && getContext()->getSettingsRef().check_table_dependencies; - std::tie(ref_dependencies, loading_dependencies) = database_catalog.removeDependencies(from_table_id, check_ref_deps, check_loading_deps); + std::tie(from_ref_dependencies, from_loading_dependencies) = database_catalog.removeDependencies(from_table_id, check_ref_deps, check_loading_deps); } try @@ -147,12 +156,17 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c exchange_tables, rename.dictionary); - DatabaseCatalog::instance().addDependencies(to_table_id, ref_dependencies, loading_dependencies); + DatabaseCatalog::instance().addDependencies(to_table_id, from_ref_dependencies, from_loading_dependencies); + if (!to_ref_dependencies.empty() || !to_loading_dependencies.empty()) + DatabaseCatalog::instance().addDependencies(from_table_id, to_ref_dependencies, to_loading_dependencies); + } catch (...) { /// Restore dependencies if RENAME fails - DatabaseCatalog::instance().addDependencies(from_table_id, ref_dependencies, loading_dependencies); + DatabaseCatalog::instance().addDependencies(from_table_id, from_ref_dependencies, from_loading_dependencies); + if (!to_ref_dependencies.empty() || !to_loading_dependencies.empty()) + DatabaseCatalog::instance().addDependencies(to_table_id, to_ref_dependencies, to_loading_dependencies); throw; } } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index e72cf670f69..90c484636ea 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -175,11 +175,10 @@ FilterDAGInfoPtr generateFilterActions( /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = TreeRewriter(context).analyzeSelect(query_ast, TreeRewriterResult({}, storage, storage_snapshot)); SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot, {}, false, {}, prepared_sets); - filter_info->actions = analyzer.simpleSelectActions(); + filter_info->actions = std::make_unique(std::move(analyzer.simpleSelectActions()->dag)); filter_info->column_name = expr_list->children.at(0)->getColumnName(); filter_info->actions->removeUnusedActions(NameSet{filter_info->column_name}); - filter_info->actions->projectInput(false); for (const auto * node : filter_info->actions->getInputs()) filter_info->actions->getOutputs().push_back(node); @@ -578,7 +577,9 @@ InterpreterSelectQuery::InterpreterSelectQuery( settings.parallel_replicas_count, settings.parallel_replica_offset, std::move(custom_key_ast), - settings.parallel_replicas_custom_key_filter_type, + {settings.parallel_replicas_custom_key_filter_type, + settings.parallel_replicas_custom_key_range_lower, + settings.parallel_replicas_custom_key_range_upper}, storage->getInMemoryMetadataPtr()->columns, context); } @@ -657,7 +658,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( MergeTreeWhereOptimizer where_optimizer{ std::move(column_compressed_sizes), metadata_snapshot, - storage->getConditionEstimatorByPredicate(storage_snapshot, nullptr, context), + storage->getConditionSelectivityEstimatorByPredicate(storage_snapshot, nullptr, context), queried_columns, supported_prewhere_columns, log}; @@ -909,7 +910,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() UInt64 max_rows = maxBlockSizeByLimit(); if (settings.max_rows_to_read) max_rows = max_rows ? std::min(max_rows, settings.max_rows_to_read.value) : settings.max_rows_to_read; - query_info_copy.limit = max_rows; + query_info_copy.trivial_limit = max_rows; /// Apply filters to prewhere and add them to the query_info so we can filter out parts efficiently during row estimation applyFiltersToPrewhereInAnalysis(analysis_copy); @@ -1076,15 +1077,15 @@ Block InterpreterSelectQuery::getSampleBlockImpl() // with this code. See // https://github.com/ClickHouse/ClickHouse/issues/19857 for details. if (analysis_result.before_window) - return analysis_result.before_window->getResultColumns(); + return analysis_result.before_window->dag.getResultColumns(); // NOTE: should not handle before_limit_by specially since // WithMergeableState does not process LIMIT BY - return analysis_result.before_order_by->getResultColumns(); + return analysis_result.before_order_by->dag.getResultColumns(); } - Block header = analysis_result.before_aggregation->getResultColumns(); + Block header = analysis_result.before_aggregation->dag.getResultColumns(); Block res; @@ -1122,18 +1123,18 @@ Block InterpreterSelectQuery::getSampleBlockImpl() // It's different from selected_columns, see the comment above for // WithMergeableState stage. if (analysis_result.before_window) - return analysis_result.before_window->getResultColumns(); + return analysis_result.before_window->dag.getResultColumns(); // In case of query on remote shards executed up to // WithMergeableStateAfterAggregation*, they can process LIMIT BY, // since the initiator will not apply LIMIT BY again. if (analysis_result.before_limit_by) - return analysis_result.before_limit_by->getResultColumns(); + return analysis_result.before_limit_by->dag.getResultColumns(); - return analysis_result.before_order_by->getResultColumns(); + return analysis_result.before_order_by->dag.getResultColumns(); } - return analysis_result.final_projection->getResultColumns(); + return analysis_result.final_projection->dag.getResultColumns(); } @@ -1472,6 +1473,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

(source_header); @@ -1634,12 +1638,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

(query_plan.getCurrentDataStream(), expressions.before_array_join); - before_array_join_step->setStepDescription("Before ARRAY JOIN"); - query_plan.addStep(std::move(before_array_join_step)); - } + executeExpression(query_plan, expressions.before_array_join, "Before ARRAY JOIN"); if (expressions.array_join) { @@ -1651,23 +1650,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

- s3 -
- - - - - - - inflight_limit1000000 - priority - fifo0 - fair1 - fifo9 - fifo1 - - - inflight_limit1000000 - priority - fifo0 - fair1 - fifo9 - fifo1 - - - - - /prio/admin - /prio/admin - - - /prio/fair/prod - /prio/fair/prod - - - /prio/fair/dev - /prio/fair/dev - - - /prio/fair/dev - /prio/fair/dev - - -

( - query_plan.getCurrentDataStream(), - expressions.before_join); - before_join_step->setStepDescription("Before JOIN"); - query_plan.addStep(std::move(before_join_step)); - } + executeExpression(query_plan, expressions.before_join, "Before JOIN"); /// Optional step to convert key columns to common supertype. if (expressions.converting_join_columns) - { - QueryPlanStepPtr convert_join_step = std::make_unique( - query_plan.getCurrentDataStream(), - expressions.converting_join_columns); - convert_join_step->setStepDescription("Convert JOIN columns"); - query_plan.addStep(std::move(convert_join_step)); - } + executeExpression(query_plan, expressions.converting_join_columns, "Convert JOIN columns"); if (expressions.hasJoin()) { @@ -2111,7 +2098,6 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. analysis.prewhere_info = std::make_shared(analysis.filter_info->actions, analysis.filter_info->column_name); - analysis.prewhere_info->prewhere_actions->projectInput(false); analysis.prewhere_info->remove_prewhere_column = analysis.filter_info->do_remove_column; analysis.prewhere_info->need_filter = true; analysis.filter_info = nullptr; @@ -2122,7 +2108,6 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis /// Add row level security actions to prewhere. analysis.prewhere_info->row_level_filter = analysis.filter_info->actions; analysis.prewhere_info->row_level_column_name = analysis.filter_info->column_name; - analysis.prewhere_info->row_level_filter->projectInput(false); analysis.filter_info = nullptr; } } @@ -2331,7 +2316,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle } if (analysis_result.hasWhere()) { - filter_nodes.push_back(&analysis_result.before_where->findInOutputs(analysis_result.where_column_name)); + filter_nodes.push_back(&analysis_result.before_where->dag.findInOutputs(analysis_result.where_column_name)); } auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes); @@ -2372,49 +2357,6 @@ UInt64 InterpreterSelectQuery::maxBlockSizeByLimit() const return 0; } -/** Storages can rely that filters that for storage will be available for analysis before - * plan is fully constructed and optimized. - * - * StorageMerge common header calculation and prewhere push-down relies on this. - * - * This is similar to Planner::collectFiltersForAnalysis - */ -void collectFiltersForAnalysis( - const ASTPtr & query_ptr, - const ContextPtr & query_context, - const StorageSnapshotPtr & storage_snapshot, - const SelectQueryOptions & options, - SelectQueryInfo & query_info) -{ - auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - - auto dummy = std::make_shared( - storage_snapshot->storage.getStorageID(), ColumnsDescription(storage_snapshot->getColumns(get_column_options)), storage_snapshot); - - QueryPlan query_plan; - InterpreterSelectQuery(query_ptr, query_context, dummy, dummy->getInMemoryMetadataPtr(), options).buildQueryPlan(query_plan); - - auto optimization_settings = QueryPlanOptimizationSettings::fromContext(query_context); - query_plan.optimize(optimization_settings); - - std::vector nodes_to_process; - nodes_to_process.push_back(query_plan.getRootNode()); - - while (!nodes_to_process.empty()) - { - const auto * node_to_process = nodes_to_process.back(); - nodes_to_process.pop_back(); - nodes_to_process.insert(nodes_to_process.end(), node_to_process->children.begin(), node_to_process->children.end()); - - auto * read_from_dummy = typeid_cast(node_to_process->step.get()); - if (!read_from_dummy) - continue; - - query_info.filter_actions_dag = read_from_dummy->getFilterActionsDAG(); - query_info.optimized_prewhere_info = read_from_dummy->getPrewhereInfo(); - } -} - void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan) { auto & query = getSelectQuery(); @@ -2440,7 +2382,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc auto column = ColumnAggregateFunction::create(func); column->insertFrom(place); - Block header = analysis_result.before_aggregation->getResultColumns(); + Block header = analysis_result.before_aggregation->dag.getResultColumns(); size_t arguments_size = desc.argument_names.size(); DataTypes argument_types(arguments_size); for (size_t j = 0; j < arguments_size; ++j) @@ -2503,13 +2445,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc if (local_limits.local_limits.size_limits.max_rows != 0) { if (max_block_limited < local_limits.local_limits.size_limits.max_rows) - query_info.limit = max_block_limited; + query_info.trivial_limit = max_block_limited; else if (local_limits.local_limits.size_limits.max_rows < std::numeric_limits::max()) /// Ask to read just enough rows to make the max_rows limit effective (so it has a chance to be triggered). - query_info.limit = 1 + local_limits.local_limits.size_limits.max_rows; + query_info.trivial_limit = 1 + local_limits.local_limits.size_limits.max_rows; } else { - query_info.limit = max_block_limited; + query_info.trivial_limit = max_block_limited; } } @@ -2544,10 +2486,6 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc } else if (storage) { - if (shouldMoveToPrewhere() && settings.query_plan_optimize_prewhere && settings.query_plan_enable_optimizations - && typeid_cast(storage.get())) - collectFiltersForAnalysis(query_ptr, context, storage_snapshot, options, query_info); - /// Table. if (max_streams == 0) max_streams = 1; @@ -2599,10 +2537,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc query_info.storage_limits = std::make_shared(storage_limits); query_info.settings_limit_offset_done = options.settings_limit_offset_done; - /// Possible filters: row-security, additional filter, replica filter (before array join), where (after array join) - query_info.has_filters_and_no_array_join_before_filter = row_policy_filter || additional_filter_info - || parallel_replicas_custom_filter_info - || (analysis_result.hasWhere() && !analysis_result.before_where->hasArrayJoin() && !analysis_result.array_join); + storage->read(query_plan, required_columns, storage_snapshot, query_info, context, processing_stage, max_block_size, max_streams); if (context->hasQueryContext() && !options.is_internal) @@ -2644,10 +2579,14 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc } } -void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter) +void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter) { + auto dag = expression->dag.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + auto where_step = std::make_unique( - query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(), remove_filter); + query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().where()->getColumnName(), remove_filter); where_step->setStepDescription("WHERE"); query_plan.addStep(std::move(where_step)); @@ -2721,11 +2660,9 @@ static GroupingSetsParamsList getAggregatorGroupingSetsParams(const SelectQueryE return result; } -void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info) +void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info) { - auto expression_before_aggregation = std::make_unique(query_plan.getCurrentDataStream(), expression); - expression_before_aggregation->setStepDescription("Before GROUP BY"); - query_plan.addStep(std::move(expression_before_aggregation)); + executeExpression(query_plan, expression, "Before GROUP BY"); AggregateDescriptions aggregates = query_analyzer->aggregates(); const Settings & settings = context->getSettingsRef(); @@ -2816,10 +2753,14 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool } -void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter) +void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter) { + auto dag = expression->dag.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + auto having_step - = std::make_unique(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), remove_filter); + = std::make_unique(query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().having()->getColumnName(), remove_filter); having_step->setStepDescription("HAVING"); query_plan.addStep(std::move(having_step)); @@ -2827,15 +2768,23 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions void InterpreterSelectQuery::executeTotalsAndHaving( - QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final) + QueryPlan & query_plan, bool has_having, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter, bool overflow_row, bool final) { + ActionsDAGPtr dag; + if (expression) + { + dag = expression->dag.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + } + const Settings & settings = context->getSettingsRef(); auto totals_having_step = std::make_unique( query_plan.getCurrentDataStream(), query_analyzer->aggregates(), overflow_row, - expression, + std::move(dag), has_having ? getSelectQuery().having()->getColumnName() : "", remove_filter, settings.totals_mode, @@ -2868,12 +2817,16 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modific query_plan.addStep(std::move(step)); } -void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description) +void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, const std::string & description) { if (!expression) return; - auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), expression); + auto dag = expression->dag.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(dag)); expression_step->setStepDescription(description); query_plan.addStep(std::move(expression_step)); @@ -3043,11 +2996,9 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const st } -void InterpreterSelectQuery::executeProjection(QueryPlan & query_plan, const ActionsDAGPtr & expression) +void InterpreterSelectQuery::executeProjection(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression) { - auto projection_step = std::make_unique(query_plan.getCurrentDataStream(), expression); - projection_step->setStepDescription("Projection"); - query_plan.addStep(std::move(projection_step)); + executeExpression(query_plan, expression, "Projection"); } diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index e89a1e5febf..d4ed19d45ea 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -174,13 +174,13 @@ private: /// Different stages of query execution. void executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan); - void executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); + void executeWhere(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter); void executeAggregation( - QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); + QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final, bool has_grouping_sets); - void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final); - void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); - static void executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description); + void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter, bool overflow_row, bool final); + void executeHaving(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool remove_filter); + static void executeExpression(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, const std::string & description); /// FIXME should go through ActionsDAG to behave as a proper function void executeWindow(QueryPlan & query_plan); void executeOrder(QueryPlan & query_plan, InputOrderInfoPtr sorting_info); @@ -191,7 +191,7 @@ private: void executeLimitBy(QueryPlan & query_plan); void executeLimit(QueryPlan & query_plan); void executeOffset(QueryPlan & query_plan); - static void executeProjection(QueryPlan & query_plan, const ActionsDAGPtr & expression); + static void executeProjection(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression); void executeDistinct(QueryPlan & query_plan, bool before_order, Names columns, bool pre_distinct); void executeExtremes(QueryPlan & query_plan); void executeSubqueriesInSetsAndJoins(QueryPlan & query_plan); diff --git a/src/Interpreters/JoinSwitcher.cpp b/src/Interpreters/JoinSwitcher.cpp index 5ea347549c1..7f75b0d74b3 100644 --- a/src/Interpreters/JoinSwitcher.cpp +++ b/src/Interpreters/JoinSwitcher.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 6ed29cfadcb..596b0e4f96c 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -56,78 +56,32 @@ void MetricLogElement::appendToBlock(MutableColumns & columns) const columns[column_idx++]->insert(current_metrics[i].toUnderType()); } - -void MetricLog::startCollectMetric(size_t collect_interval_milliseconds_) +void MetricLog::stepFunction(const std::chrono::system_clock::time_point current_time) { - collect_interval_milliseconds = collect_interval_milliseconds_; - is_shutdown_metric_thread = false; - metric_flush_thread = std::make_unique([this] { metricThreadFunction(); }); -} - - -void MetricLog::stopCollectMetric() -{ - bool old_val = false; - if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true)) - return; - if (metric_flush_thread) - metric_flush_thread->join(); -} - - -void MetricLog::shutdown() -{ - stopCollectMetric(); - stopFlushThread(); -} - - -void MetricLog::metricThreadFunction() -{ - auto desired_timepoint = std::chrono::system_clock::now(); - + /// Static lazy initialization to avoid polluting the header with implementation details /// For differentiation of ProfileEvents counters. - std::vector prev_profile_events(ProfileEvents::end()); + static std::vector prev_profile_events(ProfileEvents::end()); - while (!is_shutdown_metric_thread) + MetricLogElement elem; + elem.event_time = std::chrono::system_clock::to_time_t(current_time); + elem.event_time_microseconds = timeInMicroseconds(current_time); + + elem.profile_events.resize(ProfileEvents::end()); + for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) { - try - { - const auto current_time = std::chrono::system_clock::now(); - - MetricLogElement elem; - elem.event_time = std::chrono::system_clock::to_time_t(current_time); - elem.event_time_microseconds = timeInMicroseconds(current_time); - - elem.profile_events.resize(ProfileEvents::end()); - for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) - { - const ProfileEvents::Count new_value = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); - auto & old_value = prev_profile_events[i]; - elem.profile_events[i] = new_value - old_value; - old_value = new_value; - } - - elem.current_metrics.resize(CurrentMetrics::end()); - for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) - { - elem.current_metrics[i] = CurrentMetrics::values[i]; - } - - this->add(std::move(elem)); - - /// We will record current time into table but align it to regular time intervals to avoid time drift. - /// We may drop some time points if the server is overloaded and recording took too much time. - while (desired_timepoint <= current_time) - desired_timepoint += std::chrono::milliseconds(collect_interval_milliseconds); - - std::this_thread::sleep_until(desired_timepoint); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } + const ProfileEvents::Count new_value = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); + auto & old_value = prev_profile_events[i]; + elem.profile_events[i] = new_value - old_value; + old_value = new_value; } + + elem.current_metrics.resize(CurrentMetrics::end()); + for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) + { + elem.current_metrics[i] = CurrentMetrics::values[i]; + } + + this->add(std::move(elem)); } } diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h index 4f1e8fafc11..a6fd3ecfcd3 100644 --- a/src/Interpreters/MetricLog.h +++ b/src/Interpreters/MetricLog.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -9,7 +10,6 @@ #include #include -#include #include @@ -33,26 +33,12 @@ struct MetricLogElement void appendToBlock(MutableColumns & columns) const; }; - -class MetricLog : public SystemLog +class MetricLog : public PeriodicLog { - using SystemLog::SystemLog; + using PeriodicLog::PeriodicLog; -public: - void shutdown() override; - - /// Launches a background thread to collect metrics with interval - void startCollectMetric(size_t collect_interval_milliseconds_); - - /// Stop background thread. Call before shutdown. - void stopCollectMetric(); - -private: - void metricThreadFunction(); - - std::unique_ptr metric_flush_thread; - size_t collect_interval_milliseconds; - std::atomic is_shutdown_metric_thread{false}; +protected: + void stepFunction(TimePoint current_time) override; }; } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 4f6c1c5f18b..6d3a4f30b34 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -55,7 +55,7 @@ namespace ErrorCodes extern const int CANNOT_UPDATE_COLUMN; extern const int UNEXPECTED_EXPRESSION; extern const int THERE_IS_NO_COLUMN; - extern const int ILLEGAL_STATISTIC; + extern const int ILLEGAL_STATISTICS; } @@ -781,7 +781,7 @@ void MutationsInterpreter::prepare(bool dry_run) } else if (command.type == MutationCommand::MATERIALIZE_INDEX) { - mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); + mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION); auto it = std::find_if( std::cbegin(indices_desc), std::end(indices_desc), [&](const IndexDescription & index) @@ -801,20 +801,20 @@ void MutationsInterpreter::prepare(bool dry_run) materialized_indices.emplace(command.index_name); } } - else if (command.type == MutationCommand::MATERIALIZE_STATISTIC) + else if (command.type == MutationCommand::MATERIALIZE_STATISTICS) { - mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); - for (const auto & stat_column_name: command.statistic_columns) + mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION); + for (const auto & stat_column_name: command.statistics_columns) { - if (!columns_desc.has(stat_column_name) || !columns_desc.get(stat_column_name).stat) - throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Unknown statistic column: {}", stat_column_name); - dependencies.emplace(stat_column_name, ColumnDependency::STATISTIC); + if (!columns_desc.has(stat_column_name) || columns_desc.get(stat_column_name).statistics.empty()) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Unknown statistics column: {}", stat_column_name); + dependencies.emplace(stat_column_name, ColumnDependency::STATISTICS); materialized_statistics.emplace(stat_column_name); } } else if (command.type == MutationCommand::MATERIALIZE_PROJECTION) { - mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); + mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION); const auto & projection = projections_desc.get(command.projection_name); if (!source.hasProjection(projection.name) || source.hasBrokenProjection(projection.name)) { @@ -825,18 +825,18 @@ void MutationsInterpreter::prepare(bool dry_run) } else if (command.type == MutationCommand::DROP_INDEX) { - mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); + mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION); materialized_indices.erase(command.index_name); } - else if (command.type == MutationCommand::DROP_STATISTIC) + else if (command.type == MutationCommand::DROP_STATISTICS) { - mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); - for (const auto & stat_column_name: command.statistic_columns) + mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION); + for (const auto & stat_column_name: command.statistics_columns) materialized_statistics.erase(stat_column_name); } else if (command.type == MutationCommand::DROP_PROJECTION) { - mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION); + mutation_kind.set(MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION); materialized_projections.erase(command.projection_name); } else if (command.type == MutationCommand::MATERIALIZE_TTL) @@ -888,7 +888,7 @@ void MutationsInterpreter::prepare(bool dry_run) { if (dependency.kind == ColumnDependency::SKIP_INDEX || dependency.kind == ColumnDependency::PROJECTION - || dependency.kind == ColumnDependency::STATISTIC) + || dependency.kind == ColumnDependency::STATISTICS) dependencies.insert(dependency); } } @@ -1137,9 +1137,9 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s for (const auto & kv : stage.column_to_updated) { auto column_name = kv.second->getColumnName(); - const auto & dag_node = actions->findInOutputs(column_name); - const auto & alias = actions->addAlias(dag_node, kv.first); - actions->addOrReplaceInOutputs(alias); + const auto & dag_node = actions->dag.findInOutputs(column_name); + const auto & alias = actions->dag.addAlias(dag_node, kv.first); + actions->dag.addOrReplaceInOutputs(alias); } } @@ -1202,7 +1202,7 @@ void MutationsInterpreter::Source::read( { ActionsDAG::NodeRawConstPtrs nodes(num_filters); for (size_t i = 0; i < num_filters; ++i) - nodes[i] = &steps[i]->actions()->findInOutputs(names[i]); + nodes[i] = &steps[i]->actions()->dag.findInOutputs(names[i]); filter = ActionsDAG::buildFilterActionsDAG(nodes); } @@ -1273,18 +1273,24 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v for (size_t i = 0; i < stage.expressions_chain.steps.size(); ++i) { const auto & step = stage.expressions_chain.steps[i]; - if (step->actions()->hasArrayJoin()) + if (step->actions()->dag.hasArrayJoin()) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "arrayJoin is not allowed in mutations"); if (i < stage.filter_column_names.size()) { + auto dag = step->actions()->dag.clone(); + if (step->actions()->project_input) + dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute DELETEs. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->actions(), stage.filter_column_names[i], false)); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), dag, stage.filter_column_names[i], false)); } else { + auto dag = step->actions()->dag.clone(); + if (step->actions()->project_input) + dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute UPDATE or final projection. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->actions())); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), dag)); } } @@ -1360,7 +1366,7 @@ QueryPipelineBuilder MutationsInterpreter::execute() Block MutationsInterpreter::getUpdatedHeader() const { // If it's an index/projection materialization, we don't write any data columns, thus empty header is used - return mutation_kind.mutation_kind == MutationKind::MUTATE_INDEX_STATISTIC_PROJECTION ? Block{} : *updated_header; + return mutation_kind.mutation_kind == MutationKind::MUTATE_INDEX_STATISTICS_PROJECTION ? Block{} : *updated_header; } const ColumnDependencies & MutationsInterpreter::getColumnDependencies() const diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 2d01c7154c8..6aaa233cda3 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -102,7 +102,7 @@ public: enum MutationKindEnum { MUTATE_UNKNOWN, - MUTATE_INDEX_STATISTIC_PROJECTION, + MUTATE_INDEX_STATISTICS_PROJECTION, MUTATE_OTHER, } mutation_kind = MUTATE_UNKNOWN; diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/ObjectStorageQueueLog.cpp similarity index 80% rename from src/Interpreters/S3QueueLog.cpp rename to src/Interpreters/ObjectStorageQueueLog.cpp index ba990a8ac25..c841984fd08 100644 --- a/src/Interpreters/S3QueueLog.cpp +++ b/src/Interpreters/ObjectStorageQueueLog.cpp @@ -8,19 +8,19 @@ #include #include #include -#include +#include namespace DB { -ColumnsDescription S3QueueLogElement::getColumnsDescription() +ColumnsDescription ObjectStorageQueueLogElement::getColumnsDescription() { auto status_datatype = std::make_shared( DataTypeEnum8::Values { - {"Processed", static_cast(S3QueueLogElement::S3QueueStatus::Processed)}, - {"Failed", static_cast(S3QueueLogElement::S3QueueStatus::Failed)}, + {"Processed", static_cast(ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Processed)}, + {"Failed", static_cast(ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Failed)}, }); return ColumnsDescription @@ -36,12 +36,11 @@ ColumnsDescription S3QueueLogElement::getColumnsDescription() {"status", status_datatype, "Status of the processing file"}, {"processing_start_time", std::make_shared(std::make_shared()), "Time of the start of processing the file"}, {"processing_end_time", std::make_shared(std::make_shared()), "Time of the end of processing the file"}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected while loading this file"}, {"exception", std::make_shared(), "Exception message if happened"}, }; } -void S3QueueLogElement::appendToBlock(MutableColumns & columns) const +void ObjectStorageQueueLogElement::appendToBlock(MutableColumns & columns) const { size_t i = 0; columns[i++]->insert(getFQDNOrHostName()); @@ -64,8 +63,6 @@ void S3QueueLogElement::appendToBlock(MutableColumns & columns) const else columns[i++]->insertDefault(); - ProfileEvents::dumpToMapColumn(counters_snapshot, columns[i++].get(), true); - columns[i++]->insert(exception); } diff --git a/src/Interpreters/S3QueueLog.h b/src/Interpreters/ObjectStorageQueueLog.h similarity index 68% rename from src/Interpreters/S3QueueLog.h rename to src/Interpreters/ObjectStorageQueueLog.h index 19e69c39247..669238d8dbb 100644 --- a/src/Interpreters/S3QueueLog.h +++ b/src/Interpreters/ObjectStorageQueueLog.h @@ -9,7 +9,7 @@ namespace DB { -struct S3QueueLogElement +struct ObjectStorageQueueLogElement { time_t event_time{}; @@ -20,18 +20,17 @@ struct S3QueueLogElement std::string file_name; size_t rows_processed = 0; - enum class S3QueueStatus : uint8_t + enum class ObjectStorageQueueStatus : uint8_t { Processed, Failed, }; - S3QueueStatus status; - ProfileEvents::Counters::Snapshot counters_snapshot; + ObjectStorageQueueStatus status; time_t processing_start_time; time_t processing_end_time; std::string exception; - static std::string name() { return "S3QueueLog"; } + static std::string name() { return "ObjectStorageQueueLog"; } static ColumnsDescription getColumnsDescription(); static NamesAndAliases getNamesAndAliases() { return {}; } @@ -39,9 +38,9 @@ struct S3QueueLogElement void appendToBlock(MutableColumns & columns) const; }; -class S3QueueLog : public SystemLog +class ObjectStorageQueueLog : public SystemLog { - using SystemLog::SystemLog; + using SystemLog::SystemLog; }; } diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp new file mode 100644 index 00000000000..9d2891e11eb --- /dev/null +++ b/src/Interpreters/PeriodicLog.cpp @@ -0,0 +1,62 @@ +#include +#include +#include + +namespace DB +{ + +template +void PeriodicLog::startCollect(size_t collect_interval_milliseconds_) +{ + collect_interval_milliseconds = collect_interval_milliseconds_; + is_shutdown_metric_thread = false; + flush_thread = std::make_unique([this] { threadFunction(); }); +} + +template +void PeriodicLog::stopCollect() +{ + bool old_val = false; + if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true)) + return; + if (flush_thread) + flush_thread->join(); +} + +template +void PeriodicLog::shutdown() +{ + stopCollect(); + this->stopFlushThread(); +} + +template +void PeriodicLog::threadFunction() +{ + auto desired_timepoint = std::chrono::system_clock::now(); + while (!is_shutdown_metric_thread) + { + try + { + const auto current_time = std::chrono::system_clock::now(); + + stepFunction(current_time); + + /// We will record current time into table but align it to regular time intervals to avoid time drift. + /// We may drop some time points if the server is overloaded and recording took too much time. + while (desired_timepoint <= current_time) + desired_timepoint += std::chrono::milliseconds(collect_interval_milliseconds); + + std::this_thread::sleep_until(desired_timepoint); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + +#define INSTANTIATE_SYSTEM_LOG(ELEMENT) template class PeriodicLog; +SYSTEM_PERIODIC_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG) + +} diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h new file mode 100644 index 00000000000..08c3f7eb23f --- /dev/null +++ b/src/Interpreters/PeriodicLog.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include + +#include +#include + +#define SYSTEM_PERIODIC_LOG_ELEMENTS(M) \ + M(ErrorLogElement) \ + M(MetricLogElement) + +namespace DB +{ + +template +class PeriodicLog : public SystemLog +{ + using SystemLog::SystemLog; + +public: + using TimePoint = std::chrono::system_clock::time_point; + + /// Launches a background thread to collect metrics with interval + void startCollect(size_t collect_interval_milliseconds_); + + /// Stop background thread + void stopCollect(); + + void shutdown() final; + +protected: + virtual void stepFunction(TimePoint current_time) = 0; + +private: + void threadFunction(); + + std::unique_ptr flush_thread; + size_t collect_interval_milliseconds; + std::atomic is_shutdown_metric_thread{false}; +}; + +} diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 92f8ddae141..e63a2ae31aa 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -136,6 +136,9 @@ ColumnsDescription QueryLogElement::getColumnsDescription() {"used_row_policies", array_low_cardinality_string, "The list of row policies names that were used during query execution."}, + {"used_privileges", array_low_cardinality_string, "Privileges which were successfully checked during query execution."}, + {"missing_privileges", array_low_cardinality_string, "Privileges that are missing during query execution."}, + {"transaction_id", getTransactionIDDataType(), "The identifier of the transaction in scope of which this query was executed."}, {"query_cache_usage", std::move(query_cache_usage_datatype), "Usage of the query cache during query execution. Values: 'Unknown' = Status unknown, 'None' = The query result was neither written into nor read from the query cache, 'Write' = The query result was written into the query cache, 'Read' = The query result was read from the query cache."}, @@ -267,6 +270,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const auto & column_storage_factory_objects = typeid_cast(*columns[i++]); auto & column_table_function_factory_objects = typeid_cast(*columns[i++]); auto & column_row_policies_names = typeid_cast(*columns[i++]); + auto & column_used_privileges = typeid_cast(*columns[i++]); + auto & column_missing_privileges = typeid_cast(*columns[i++]); auto fill_column = [](const auto & data, ColumnArray & column) { @@ -290,6 +295,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const fill_column(used_storages, column_storage_factory_objects); fill_column(used_table_functions, column_table_function_factory_objects); fill_column(used_row_policies, column_row_policies_names); + fill_column(used_privileges, column_used_privileges); + fill_column(missing_privileges, column_missing_privileges); } columns[i++]->insert(Tuple{tid.start_csn, tid.local_tid, tid.host_id}); diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 5072d220160..bbaa7179757 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -81,6 +81,8 @@ struct QueryLogElement std::unordered_set used_storages; std::unordered_set used_table_functions; std::set used_row_policies; + std::unordered_set used_privileges; + std::unordered_set missing_privileges; Int32 exception_code{}; // because ErrorCodes are int String exception; diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 315202cc01d..872a9f864df 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -210,27 +210,47 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr auto total = disk->getTotalSpace(); /// Some disks don't support information about the space. - if (!total) - continue; - - auto available = disk->getAvailableSpace(); - auto unreserved = disk->getUnreservedSpace(); - - new_values[fmt::format("DiskTotal_{}", name)] = { *total, - "The total size in bytes of the disk (virtual filesystem). Remote filesystems may not provide this information." }; - - if (available) + if (total) { - new_values[fmt::format("DiskUsed_{}", name)] = { *total - *available, - "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; + auto available = disk->getAvailableSpace(); + auto unreserved = disk->getUnreservedSpace(); - new_values[fmt::format("DiskAvailable_{}", name)] = { *available, - "Available bytes on the disk (virtual filesystem). Remote filesystems may not provide this information." }; + new_values[fmt::format("DiskTotal_{}", name)] = { *total, + "The total size in bytes of the disk (virtual filesystem). Remote filesystems may not provide this information." }; + + if (available) + { + new_values[fmt::format("DiskUsed_{}", name)] = { *total - *available, + "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; + + new_values[fmt::format("DiskAvailable_{}", name)] = { *available, + "Available bytes on the disk (virtual filesystem). Remote filesystems may not provide this information." }; + } + + if (unreserved) + new_values[fmt::format("DiskUnreserved_{}", name)] = { *unreserved, + "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems may not provide this information." }; } - if (unreserved) - new_values[fmt::format("DiskUnreserved_{}", name)] = { *unreserved, - "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems may not provide this information." }; +#if USE_AWS_S3 + if (auto s3_client = disk->tryGetS3StorageClient()) + { + if (auto put_throttler = s3_client->getPutRequestThrottler()) + { + new_values[fmt::format("DiskPutObjectThrottlerRPS_{}", name)] = { put_throttler->getMaxSpeed(), + "PutObject Request throttling limit on the disk in requests per second (virtual filesystem). Local filesystems may not provide this information." }; + new_values[fmt::format("DiskPutObjectThrottlerAvailable_{}", name)] = { put_throttler->getAvailable(), + "Number of PutObject requests that can be currently issued without hitting throttling limit on the disk (virtual filesystem). Local filesystems may not provide this information." }; + } + if (auto get_throttler = s3_client->getGetRequestThrottler()) + { + new_values[fmt::format("DiskGetObjectThrottlerRPS_{}", name)] = { get_throttler->getMaxSpeed(), + "GetObject Request throttling limit on the disk in requests per second (virtual filesystem). Local filesystems may not provide this information." }; + new_values[fmt::format("DiskGetObjectThrottlerAvailable_{}", name)] = { get_throttler->getAvailable(), + "Number of GetObject requests that can be currently issued without hitting throttling limit on the disk (virtual filesystem). Local filesystems may not provide this information." }; + } + } +#endif } } diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 396562189e0..bb8c415602f 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -532,7 +532,7 @@ ContextMutablePtr Session::makeSessionContext() session_context->checkSettingsConstraints(settings_from_auth_server, SettingSource::QUERY); session_context->applySettingsChanges(settings_from_auth_server); - recordLoginSucess(session_context); + recordLoginSuccess(session_context); return session_context; } @@ -596,7 +596,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: { session_name_ }, max_sessions_for_user); - recordLoginSucess(session_context); + recordLoginSuccess(session_context); return session_context; } @@ -672,13 +672,13 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t user = query_context->getUser(); /// Interserver does not create session context - recordLoginSucess(query_context); + recordLoginSuccess(query_context); return query_context; } -void Session::recordLoginSucess(ContextPtr login_context) const +void Session::recordLoginSuccess(ContextPtr login_context) const { if (notified_session_log_about_login) return; @@ -694,7 +694,7 @@ void Session::recordLoginSucess(ContextPtr login_context) const session_log->addLoginSuccess(auth_id, named_session ? named_session->key.second : "", settings, - access, + access->getAccess(), getClientInfo(), user); } diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 14f6f806acd..fc41c78e666 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -102,8 +102,7 @@ public: private: std::shared_ptr getSessionLog() const; ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const; - void recordLoginSucess(ContextPtr login_context) const; - + void recordLoginSuccess(ContextPtr login_context) const; mutable bool notified_session_log_about_login = false; const UUID auth_id; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index adb94cae0c2..0615a2a1d62 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -86,6 +86,7 @@ ColumnsDescription SessionLogElement::getColumnsDescription() AUTH_TYPE_NAME_AND_VALUE(AuthType::SHA256_PASSWORD), AUTH_TYPE_NAME_AND_VALUE(AuthType::DOUBLE_SHA1_PASSWORD), AUTH_TYPE_NAME_AND_VALUE(AuthType::LDAP), + AUTH_TYPE_NAME_AND_VALUE(AuthType::JWT), AUTH_TYPE_NAME_AND_VALUE(AuthType::KERBEROS), AUTH_TYPE_NAME_AND_VALUE(AuthType::SSH_KEY), AUTH_TYPE_NAME_AND_VALUE(AuthType::SSL_CERTIFICATE), @@ -93,7 +94,7 @@ ColumnsDescription SessionLogElement::getColumnsDescription() AUTH_TYPE_NAME_AND_VALUE(AuthType::HTTP), }); #undef AUTH_TYPE_NAME_AND_VALUE - static_assert(static_cast(AuthenticationType::MAX) == 10); + static_assert(static_cast(AuthenticationType::MAX) == 11); auto interface_type_column = std::make_shared( DataTypeEnum8::Values @@ -214,7 +215,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, const ClientInfo & client_info, const UserPtr & login_user) { - DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); + SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); log_entry.client_info = client_info; if (login_user) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp new file mode 100644 index 00000000000..f8b6a6542cc --- /dev/null +++ b/src/Interpreters/Squashing.cpp @@ -0,0 +1,159 @@ +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) + : header(header_) + , min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) +{ +} + +Chunk Squashing::flush() +{ + return convertToChunk(std::move(chunks_to_merge_vec)); +} + +Chunk Squashing::squash(Chunk && input_chunk) +{ + if (!input_chunk.hasChunkInfo()) + return Chunk(); + + const auto *info = getInfoFromChunk(input_chunk); + return squash(info->chunks); +} + +Chunk Squashing::add(Chunk && input_chunk) +{ + if (!input_chunk) + return {}; + + /// Just read block is already enough. + if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) + { + /// If no accumulated data, return just read block. + if (chunks_to_merge_vec.empty()) + { + chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + return res_chunk; + } + + /// Return accumulated data (maybe it has small size) and place new block to accumulated data. + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + return res_chunk; + } + + /// Accumulated block is already enough. + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + { + /// Return accumulated data and place new block to accumulated data. + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + return res_chunk; + } + + /// Pushing data into accumulating vector + expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + + /// If accumulated data is big enough, we send it + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + { + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + changeCurrentSize(0, 0); + chunks_to_merge_vec.clear(); + return res_chunk; + } + return {}; +} + +Chunk Squashing::convertToChunk(std::vector && chunks) const +{ + if (chunks.empty()) + return {}; + + auto info = std::make_shared(); + info->chunks = std::move(chunks); + + chunks.clear(); + + return Chunk(header.cloneEmptyColumns(), 0, info); +} + +Chunk Squashing::squash(std::vector & input_chunks) +{ + Chunk accumulated_chunk; + std::vector mutable_columns = {}; + size_t rows = 0; + for (const Chunk & chunk : input_chunks) + rows += chunk.getNumRows(); + + { + auto & first_chunk = input_chunks[0]; + Columns columns = first_chunk.detachColumns(); + for (auto & column : columns) + { + mutable_columns.push_back(IColumn::mutate(std::move(column))); + mutable_columns.back()->reserve(rows); + } + } + + for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above + { + Columns columns = input_chunks[i].detachColumns(); + for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) + { + const auto source_column = columns[j]; + + mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); + } + } + accumulated_chunk.setColumns(std::move(mutable_columns), rows); + return accumulated_chunk; +} + +const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) +{ + const auto& info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); + + if (!agg_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return agg_info; +} + +void Squashing::expandCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows += rows; + accumulated_size.bytes += bytes; +} + +void Squashing::changeCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows = rows; + accumulated_size.bytes = bytes; +} + +bool Squashing::isEnoughSize(size_t rows, size_t bytes) const +{ + return (!min_block_size_rows && !min_block_size_bytes) + || (min_block_size_rows && rows >= min_block_size_rows) + || (min_block_size_bytes && bytes >= min_block_size_bytes); +} +} diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h new file mode 100644 index 00000000000..d76cca60e41 --- /dev/null +++ b/src/Interpreters/Squashing.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +struct ChunksToSquash : public ChunkInfo +{ + mutable std::vector chunks = {}; +}; + +/** Merging consecutive passed blocks to specified minimum size. + * + * (But if one of input blocks has already at least specified size, + * then don't merge it with neighbours, even if neighbours are small.) + * + * Used to prepare blocks to adequate size for INSERT queries, + * because such storages as Memory, StripeLog, Log, TinyLog... + * store or compress data in blocks exactly as passed to it, + * and blocks of small size are not efficient. + * + * Order of data is kept. + */ + +class Squashing +{ +public: + explicit Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + Squashing(Squashing && other) = default; + + Chunk add(Chunk && input_chunk); + static Chunk squash(Chunk && input_chunk); + Chunk flush(); + + bool isDataLeft() + { + return !chunks_to_merge_vec.empty(); + } + + Block header; +private: + struct CurrentSize + { + size_t rows = 0; + size_t bytes = 0; + }; + + std::vector chunks_to_merge_vec = {}; + size_t min_block_size_rows; + size_t min_block_size_bytes; + + CurrentSize accumulated_size; + + static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); + + static Chunk squash(std::vector & input_chunks); + + void expandCurrentSize(size_t rows, size_t bytes); + void changeCurrentSize(size_t rows, size_t bytes); + bool isEnoughSize(size_t rows, size_t bytes) const; + + Chunk convertToChunk(std::vector && chunks) const; +}; + +} diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp deleted file mode 100644 index 41f024df7a7..00000000000 --- a/src/Interpreters/SquashingTransform.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; -} - -SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) - , min_block_size_bytes(min_block_size_bytes_) -{ -} - -Block SquashingTransform::add(Block && input_block) -{ - return addImpl(std::move(input_block)); -} - -Block SquashingTransform::add(const Block & input_block) -{ - return addImpl(input_block); -} - -/* - * To minimize copying, accept two types of argument: const reference for output - * stream, and rvalue reference for input stream, and decide whether to copy - * inside this function. This allows us not to copy Block unless we absolutely - * have to. - */ -template -Block SquashingTransform::addImpl(ReferenceType input_block) -{ - /// End of input stream. - if (!input_block) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Just read block is already enough. - if (isEnoughSize(input_block)) - { - /// If no accumulated data, return just read block. - if (!accumulated_block) - { - return std::move(input_block); - } - - /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Block to_return = std::move(input_block); - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Accumulated block is already enough. - if (isEnoughSize(accumulated_block)) - { - /// Return accumulated data and place new block to accumulated data. - Block to_return = std::move(input_block); - std::swap(to_return, accumulated_block); - return to_return; - } - - append(std::move(input_block)); - if (isEnoughSize(accumulated_block)) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Squashed block is not ready. - return {}; -} - - -template -void SquashingTransform::append(ReferenceType input_block) -{ - if (!accumulated_block) - { - accumulated_block = std::move(input_block); - return; - } - - assert(blocksHaveEqualStructure(input_block, accumulated_block)); - - try - { - for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) - { - const auto source_column = input_block.getByPosition(i).column; - - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); - mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); - accumulated_block.getByPosition(i).column = std::move(mutable_column); - } - } - catch (...) - { - /// add() may be called again even after a previous add() threw an exception. - /// Keep accumulated_block in a valid state. - /// Seems ok to discard accumulated data because we're throwing an exception, which the caller will - /// hopefully interpret to mean "this block and all *previous* blocks are potentially lost". - accumulated_block.clear(); - throw; - } -} - - -bool SquashingTransform::isEnoughSize(const Block & block) -{ - size_t rows = 0; - size_t bytes = 0; - - for (const auto & [column, type, name] : block) - { - if (!column) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block."); - - if (!rows) - rows = column->size(); - else if (rows != column->size()) - throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); - - bytes += column->byteSize(); - } - - return isEnoughSize(rows, bytes); -} - - -bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const -{ - return (!min_block_size_rows && !min_block_size_bytes) - || (min_block_size_rows && rows >= min_block_size_rows) - || (min_block_size_bytes && bytes >= min_block_size_bytes); -} - -} diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h deleted file mode 100644 index b04d012bcd1..00000000000 --- a/src/Interpreters/SquashingTransform.h +++ /dev/null @@ -1,50 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - - -/** Merging consecutive passed blocks to specified minimum size. - * - * (But if one of input blocks has already at least specified size, - * then don't merge it with neighbours, even if neighbours are small.) - * - * Used to prepare blocks to adequate size for INSERT queries, - * because such storages as Memory, StripeLog, Log, TinyLog... - * store or compress data in blocks exactly as passed to it, - * and blocks of small size are not efficient. - * - * Order of data is kept. - */ -class SquashingTransform -{ -public: - /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. - SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_); - - /** Add next block and possibly returns squashed block. - * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. - */ - Block add(Block && block); - Block add(const Block & block); - -private: - size_t min_block_size_rows; - size_t min_block_size_bytes; - - Block accumulated_block; - - template - Block addImpl(ReferenceType block); - - template - void append(ReferenceType block); - - bool isEnoughSize(const Block & block); - bool isEnoughSize(size_t rows, size_t bytes) const; -}; - -} diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index 96e3cefe00c..f9afbc7b98d 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -1,7 +1,6 @@ #pragma once #include #include -#include #include #include #include @@ -136,7 +135,7 @@ namespace fmt } template - auto format(const DB::StorageID & storage_id, FormatContext & ctx) + auto format(const DB::StorageID & storage_id, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}", storage_id.getNameForLogs()); } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3af8761ff8e..557065b23ff 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -24,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -116,6 +117,7 @@ namespace { constexpr size_t DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000; +constexpr size_t DEFAULT_ERROR_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000; /// Creates a system log with MergeTree engine using parameters from config template @@ -286,6 +288,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf crash_log = createSystemLog(global_context, "system", "crash_log", config, "crash_log", "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur."); text_log = createSystemLog(global_context, "system", "text_log", config, "text_log", "Contains logging entries which are normally written to a log file or to stdout."); metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log", "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk."); + error_log = createSystemLog(global_context, "system", "error_log", config, "error_log", "Contains history of error values from table system.errors, periodically flushed to disk."); filesystem_cache_log = createSystemLog(global_context, "system", "filesystem_cache_log", config, "filesystem_cache_log", "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem."); filesystem_read_prefetches_log = createSystemLog( global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log", "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem."); @@ -303,7 +306,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log", "Contains profiling information on processors level (building blocks for a pipeline for query execution."); asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log", "Contains a history for all asynchronous inserts executed on current server."); backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log", "Contains logging entries with the information about BACKUP and RESTORE operations."); - s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); + s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); + azure_queue_log = createSystemLog(global_context, "system", "azure_queue_log", config, "azure_queue_log", "Contains logging entries with the information files processes by S3Queue engine."); blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes."); if (query_log) @@ -320,6 +324,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf logs.emplace_back(text_log.get()); if (metric_log) logs.emplace_back(metric_log.get()); + if (error_log) + logs.emplace_back(error_log.get()); if (asynchronous_metric_log) logs.emplace_back(asynchronous_metric_log.get()); if (opentelemetry_span_log) @@ -366,7 +372,14 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf { size_t collect_interval_milliseconds = config.getUInt64("metric_log.collect_interval_milliseconds", DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS); - metric_log->startCollectMetric(collect_interval_milliseconds); + metric_log->startCollect(collect_interval_milliseconds); + } + + if (error_log) + { + size_t collect_interval_milliseconds = config.getUInt64("error_log.collect_interval_milliseconds", + DEFAULT_ERROR_LOG_COLLECT_INTERVAL_MILLISECONDS); + error_log->startCollect(collect_interval_milliseconds); } if (crash_log) @@ -504,6 +517,10 @@ void SystemLog::flushImpl(const std::vector & to_flush, Block block(std::move(log_element_columns)); MutableColumns columns = block.mutateColumns(); + + for (auto & column : columns) + column->reserve(to_flush.size()); + for (const auto & elem : to_flush) elem.appendToBlock(columns); @@ -519,8 +536,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, // we need query context to do inserts to target table with MV containing subqueries or joins auto insert_context = Context::createCopy(context); insert_context->makeQueryContext(); - /// We always want to deliver the data to the original table regardless of the MVs - insert_context->setSetting("materialized_views_ignore_errors", true); + addSettingsForQuery(insert_context, IAST::QueryKind::Insert); InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); @@ -533,7 +549,8 @@ void SystemLog::flushImpl(const std::vector & to_flush, } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Failed to flush system log {} with {} entries up to offset {}", + table_id.getNameForLogs(), to_flush.size(), to_flush_end)); } queue->confirm(to_flush_end); @@ -541,13 +558,18 @@ void SystemLog::flushImpl(const std::vector & to_flush, LOG_TRACE(log, "Flushed system log up to offset {}", to_flush_end); } +template +StoragePtr SystemLog::getStorage() const +{ + return DatabaseCatalog::instance().tryGetTable(table_id, getContext()); +} template void SystemLog::prepareTable() { String description = table_id.getNameForLogs(); - auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); + auto table = getStorage(); if (table) { if (old_create_query.empty()) @@ -596,10 +618,9 @@ void SystemLog::prepareTable() merges_lock = table->getActionLock(ActionLocks::PartsMerge); auto query_context = Context::createCopy(context); - /// As this operation is performed automatically we don't want it to fail because of user dependencies on log tables - query_context->setSetting("check_table_dependencies", Field{false}); - query_context->setSetting("check_referential_table_dependencies", Field{false}); query_context->makeQueryContext(); + addSettingsForQuery(query_context, IAST::QueryKind::Rename); + InterpreterRenameQuery(rename, query_context).execute(); /// The required table will be created. @@ -616,6 +637,7 @@ void SystemLog::prepareTable() auto query_context = Context::createCopy(context); query_context->makeQueryContext(); + addSettingsForQuery(query_context, IAST::QueryKind::Create); auto create_query_ast = getCreateTableQuery(); InterpreterCreateQuery interpreter(create_query_ast, query_context); @@ -630,6 +652,22 @@ void SystemLog::prepareTable() is_prepared = true; } +template +void SystemLog::addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const +{ + if (query_kind == IAST::QueryKind::Insert) + { + /// We always want to deliver the data to the original table regardless of the MVs + mutable_context->setSetting("materialized_views_ignore_errors", true); + } + else if (query_kind == IAST::QueryKind::Rename) + { + /// As this operation is performed automatically we don't want it to fail because of user dependencies on log tables + mutable_context->setSetting("check_table_dependencies", Field{false}); + mutable_context->setSetting("check_referential_table_dependencies", Field{false}); + } +} + template ASTPtr SystemLog::getCreateTableQuery() { diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index e5b79585701..94cb8c3e2fd 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -39,6 +40,7 @@ class PartLog; class TextLog; class TraceLog; class CrashLog; +class ErrorLog; class MetricLog; class AsynchronousMetricLog; class OpenTelemetrySpanLog; @@ -51,7 +53,7 @@ class FilesystemCacheLog; class FilesystemReadPrefetchesLog; class AsynchronousInsertLog; class BackupLog; -class S3QueueLog; +class ObjectStorageQueueLog; class BlobStorageLog; /// System logs should be destroyed in destructor of the last Context and before tables, @@ -71,9 +73,11 @@ struct SystemLogs std::shared_ptr crash_log; /// Used to log server crashes. std::shared_ptr text_log; /// Used to log all text messages. std::shared_ptr metric_log; /// Used to log all metrics. + std::shared_ptr error_log; /// Used to log errors. std::shared_ptr filesystem_cache_log; std::shared_ptr filesystem_read_prefetches_log; - std::shared_ptr s3_queue_log; + std::shared_ptr s3_queue_log; + std::shared_ptr azure_queue_log; /// Metrics from system.asynchronous_metrics. std::shared_ptr asynchronous_metric_log; /// OpenTelemetry trace spans. @@ -139,6 +143,17 @@ protected: using ISystemLog::thread_mutex; using Base::queue; + StoragePtr getStorage() const; + + /** Creates new table if it does not exist. + * Renames old table if its structure is not suitable. + * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. + */ + void prepareTable() override; + + /// Some tables can override settings for internal queries + virtual void addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const; + private: /* Saving thread data */ const StorageID table_id; @@ -147,12 +162,6 @@ private: String old_create_query; bool is_prepared = false; - /** Creates new table if it does not exist. - * Renames old table if its structure is not suitable. - * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. - */ - void prepareTable() override; - void savingThreadFunction() override; /// flushImpl can be executed only in saving_thread. diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index a74b5bba2b9..7f0fb8cd6ca 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -224,25 +226,37 @@ struct TemporaryFileStream::OutputWriter bool finalized = false; }; -TemporaryFileStream::Reader::Reader(const String & path, const Block & header_, size_t size) - : in_file_buf(path, size ? std::min(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE) - , in_compressed_buf(in_file_buf) - , in_reader(in_compressed_buf, header_, DBMS_TCP_PROTOCOL_VERSION) +TemporaryFileStream::Reader::Reader(const String & path_, const Block & header_, size_t size_) + : path(path_) + , size(size_ ? std::min(size_, DBMS_DEFAULT_BUFFER_SIZE) : DBMS_DEFAULT_BUFFER_SIZE) + , header(header_) { LOG_TEST(getLogger("TemporaryFileStream"), "Reading {} from {}", header_.dumpStructure(), path); } -TemporaryFileStream::Reader::Reader(const String & path, size_t size) - : in_file_buf(path, size ? std::min(DBMS_DEFAULT_BUFFER_SIZE, size) : DBMS_DEFAULT_BUFFER_SIZE) - , in_compressed_buf(in_file_buf) - , in_reader(in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION) +TemporaryFileStream::Reader::Reader(const String & path_, size_t size_) + : path(path_) + , size(size_ ? std::min(size_, DBMS_DEFAULT_BUFFER_SIZE) : DBMS_DEFAULT_BUFFER_SIZE) { LOG_TEST(getLogger("TemporaryFileStream"), "Reading from {}", path); } Block TemporaryFileStream::Reader::read() { - return in_reader.read(); + if (!in_reader) + { + if (fs::exists(path)) + in_file_buf = std::make_unique(path, size); + else + in_file_buf = std::make_unique(); + + in_compressed_buf = std::make_unique(*in_file_buf); + if (header.has_value()) + in_reader = std::make_unique(*in_compressed_buf, header.value(), DBMS_TCP_PROTOCOL_VERSION); + else + in_reader = std::make_unique(*in_compressed_buf, DBMS_TCP_PROTOCOL_VERSION); + } + return in_reader->read(); } TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_) diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index 488eed70da9..d541c93e031 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -151,9 +151,13 @@ public: Block read(); - ReadBufferFromFile in_file_buf; - CompressedReadBuffer in_compressed_buf; - NativeReader in_reader; + const std::string path; + const size_t size; + const std::optional header; + + std::unique_ptr in_file_buf; + std::unique_ptr in_compressed_buf; + std::unique_ptr in_reader; }; struct Stat diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 8e9c397b7a1..77f70d754c8 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -1,5 +1,4 @@ -#include "TraceCollector.h" - +#include #include #include #include @@ -14,8 +13,12 @@ namespace DB { -TraceCollector::TraceCollector(std::shared_ptr trace_log_) - : trace_log(std::move(trace_log_)) +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +TraceCollector::TraceCollector() { TraceSender::pipe.open(); @@ -28,6 +31,23 @@ TraceCollector::TraceCollector(std::shared_ptr trace_log_) thread = ThreadFromGlobalPool(&TraceCollector::run, this); } +void TraceCollector::initialize(std::shared_ptr trace_log_) +{ + if (is_trace_log_initialized) + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "TraceCollector is already initialized"); + + trace_log_ptr = trace_log_; + is_trace_log_initialized.store(true, std::memory_order_release); +} + +std::shared_ptr TraceCollector::getTraceLog() +{ + if (!is_trace_log_initialized.load(std::memory_order_acquire)) + return nullptr; + + return trace_log_ptr; +} + void TraceCollector::tryClosePipe() { try @@ -120,7 +140,7 @@ void TraceCollector::run() ProfileEvents::Count increment; readPODBinary(increment, in); - if (trace_log) + if (auto trace_log = getTraceLog()) { // time and time_in_microseconds are both being constructed from the same timespec so that the // times will be equal up to the precision of a second. diff --git a/src/Interpreters/TraceCollector.h b/src/Interpreters/TraceCollector.h index 382e7511ac6..c2894394dd0 100644 --- a/src/Interpreters/TraceCollector.h +++ b/src/Interpreters/TraceCollector.h @@ -1,4 +1,5 @@ #pragma once +#include #include class StackTrace; @@ -16,11 +17,17 @@ class TraceLog; class TraceCollector { public: - explicit TraceCollector(std::shared_ptr trace_log_); + TraceCollector(); ~TraceCollector(); + void initialize(std::shared_ptr trace_log_); + private: - std::shared_ptr trace_log; + std::shared_ptr getTraceLog(); + + std::atomic is_trace_log_initialized = false; + std::shared_ptr trace_log_ptr; + ThreadFromGlobalPool thread; void tryClosePipe(); diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 9363e3d83eb..184c263dbdb 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -615,9 +615,9 @@ static bool decimalEqualsFloat(Field field, Float64 float_value) return decimal_to_float == float_value; } -std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type) +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & from_type, const IDataType & to_type) { - Field result_value = convertFieldToType(from_value, to_type); + Field result_value = convertFieldToType(from_value, to_type, &from_type); if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType())) { diff --git a/src/Interpreters/convertFieldToType.h b/src/Interpreters/convertFieldToType.h index 7f49ea5479d..4aa09f8619e 100644 --- a/src/Interpreters/convertFieldToType.h +++ b/src/Interpreters/convertFieldToType.h @@ -22,6 +22,6 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t /// Applies stricter rules than convertFieldToType, doesn't allow loss of precision converting to Decimal. /// Returns `Field` if the conversion was successful and the result is equal to the original value, otherwise returns nullopt. -std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type); +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & from_type, const IDataType & to_type); } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 9c5436517ab..9f33cbf1c27 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -44,6 +44,7 @@ #include #include +#include #include #include #include @@ -221,6 +222,17 @@ static void logException(ContextPtr context, QueryLogElement & elem, bool log_er LOG_INFO(getLogger("executeQuery"), message); } +static void +addPrivilegesInfoToQueryLogElement(QueryLogElement & element, const ContextPtr context_ptr) +{ + const auto & privileges_info = context_ptr->getQueryPrivilegesInfo(); + { + std::lock_guard lock(privileges_info.mutex); + element.used_privileges = privileges_info.used_privileges; + element.missing_privileges = privileges_info.missing_privileges; + } +} + static void addStatusInfoToQueryLogElement(QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) { @@ -286,6 +298,7 @@ addStatusInfoToQueryLogElement(QueryLogElement & element, const QueryStatusInfo } element.async_read_counters = context_ptr->getAsyncReadCounters(); + addPrivilegesInfoToQueryLogElement(element, context_ptr); } @@ -601,6 +614,8 @@ void logExceptionBeforeStart( elem.formatted_query = queryToString(ast); } + addPrivilegesInfoToQueryLogElement(elem, context); + // We don't calculate databases, tables and columns when the query isn't able to start elem.exception_code = getCurrentExceptionCode(); diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp index d78b6ab0c4d..43be7c5f043 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp @@ -7,7 +7,6 @@ #include -#include #include @@ -18,18 +17,19 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; + extern const int INVALID_SETTING_VALUE; } ASTPtr getCustomKeyFilterForParallelReplica( size_t replicas_count, size_t replica_num, ASTPtr custom_key_ast, - ParallelReplicasCustomKeyFilterType filter_type, + ParallelReplicasCustomKeyFilter filter, const ColumnsDescription & columns, const ContextPtr & context) { chassert(replicas_count > 1); - if (filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT) + if (filter.filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT) { // first we do modulo with replica count auto modulo_function = makeASTFunction("positiveModulo", custom_key_ast, std::make_shared(replicas_count)); @@ -40,35 +40,80 @@ ASTPtr getCustomKeyFilterForParallelReplica( return equals_function; } - assert(filter_type == ParallelReplicasCustomKeyFilterType::RANGE); + chassert(filter.filter_type == ParallelReplicasCustomKeyFilterType::RANGE); KeyDescription custom_key_description = KeyDescription::getKeyFromAST(custom_key_ast, columns, context); using RelativeSize = boost::rational; - RelativeSize size_of_universum = 0; + RelativeSize range_upper = RelativeSize(0); + RelativeSize range_lower = RelativeSize(filter.range_lower); DataTypePtr custom_key_column_type = custom_key_description.data_types[0]; - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); if (custom_key_description.data_types.size() == 1) { if (typeid_cast(custom_key_column_type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + { + range_upper = filter.range_upper > 0 ? RelativeSize(filter.range_upper) + RelativeSize(1) + : RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + if (range_upper > RelativeSize(std::numeric_limits::max()) + RelativeSize(1)) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Invalid custom key range upper bound: {}. Value must be smaller than custom key column type (UInt64) max value", + range_upper); + } else if (typeid_cast(custom_key_column_type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + { + range_upper = filter.range_upper > 0 ? RelativeSize(filter.range_upper) + RelativeSize(1) + : RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + if (range_upper > RelativeSize(std::numeric_limits::max()) + RelativeSize(1)) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Invalid custom key range upper bound: {}. Value must be smaller than custom key column type (UInt32) max value", + range_upper); + } else if (typeid_cast(custom_key_column_type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + { + range_upper = filter.range_upper > 0 ? RelativeSize(filter.range_upper) + RelativeSize(1) + : RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + if (range_upper > RelativeSize(std::numeric_limits::max()) + RelativeSize(1)) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Invalid custom key range upper bound: {}. Value must be smaller than custom key column type (UInt16) max value", + range_upper); + } else if (typeid_cast(custom_key_column_type.get())) - size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + { + range_upper = filter.range_upper > 0 ? RelativeSize(filter.range_upper) + RelativeSize(1) + : RelativeSize(std::numeric_limits::max()) + RelativeSize(1); + if (range_upper > RelativeSize(std::numeric_limits::max()) + RelativeSize(1)) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Invalid custom key range upper bound: {}. Value must be smaller than custom key column type (UInt8) max value", + range_upper); + } } - if (size_of_universum == RelativeSize(0)) + if (range_upper == RelativeSize(0)) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid custom key column type: {}. Must be one unsigned integer type", custom_key_column_type->getName()); + if (range_lower >= range_upper) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Invalid custom key filter range: Range upper bound {} must be larger than range lower bound {}", + range_lower, + range_upper); + + RelativeSize size_of_universum = range_upper - range_lower; + + if (size_of_universum <= RelativeSize(replicas_count)) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, "Invalid custom key filter range: Range must be larger than than the number of replicas"); + RelativeSize relative_range_size = RelativeSize(1) / replicas_count; RelativeSize relative_range_offset = relative_range_size * RelativeSize(replica_num); @@ -76,19 +121,19 @@ ASTPtr getCustomKeyFilterForParallelReplica( bool has_lower_limit = false; bool has_upper_limit = false; - RelativeSize lower_limit_rational = relative_range_offset * size_of_universum; - RelativeSize upper_limit_rational = (relative_range_offset + relative_range_size) * size_of_universum; + RelativeSize lower_limit_rational = range_lower + relative_range_offset * size_of_universum; + RelativeSize upper_limit_rational = range_lower + (relative_range_offset + relative_range_size) * size_of_universum; UInt64 lower = boost::rational_cast(lower_limit_rational); UInt64 upper = boost::rational_cast(upper_limit_rational); - if (lower > 0) + if (lower_limit_rational > range_lower) has_lower_limit = true; - if (upper_limit_rational < size_of_universum) + if (upper_limit_rational < range_upper) has_upper_limit = true; - assert(has_lower_limit || has_upper_limit); + chassert(has_lower_limit || has_upper_limit); /// Let's add the conditions to cut off something else when the index is scanned again and when the request is processed. std::shared_ptr lower_function; @@ -110,7 +155,7 @@ ASTPtr getCustomKeyFilterForParallelReplica( return upper_function; } - assert(upper_function && lower_function); + chassert(upper_function && lower_function); return makeASTFunction("and", std::move(lower_function), std::move(upper_function)); } diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h index 1506c1992c0..dfee5123ecb 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h @@ -6,16 +6,24 @@ #include #include #include +#include namespace DB { +struct ParallelReplicasCustomKeyFilter +{ + ParallelReplicasCustomKeyFilterType filter_type; + UInt64 range_lower; + UInt64 range_upper; +}; + /// Get AST for filter created from custom_key /// replica_num is the number of the replica for which we are generating filter starting from 0 ASTPtr getCustomKeyFilterForParallelReplica( size_t replicas_count, size_t replica_num, ASTPtr custom_key_ast, - ParallelReplicasCustomKeyFilterType filter_type, + ParallelReplicasCustomKeyFilter filter, const ColumnsDescription & columns, const ContextPtr & context); diff --git a/src/Interpreters/joinDispatch.h b/src/Interpreters/joinDispatch.h index dccbe68fdb6..54c5c7dc83a 100644 --- a/src/Interpreters/joinDispatch.h +++ b/src/Interpreters/joinDispatch.h @@ -3,7 +3,7 @@ #include #include -#include +#include /** Used in implementation of Join to process different data structures. diff --git a/src/Interpreters/tests/gtest_actions_visitor.cpp b/src/Interpreters/tests/gtest_actions_visitor.cpp index 3de39ae6bfa..28e83306c53 100644 --- a/src/Interpreters/tests/gtest_actions_visitor.cpp +++ b/src/Interpreters/tests/gtest_actions_visitor.cpp @@ -31,7 +31,7 @@ TEST(ActionsVisitor, VisitLiteral) size_limits_for_set, size_t(0), name_and_types, - std::make_shared(name_and_types), + ActionsDAG(name_and_types), std::make_shared(), false /* no_subqueries */, false /* no_makeset */, @@ -39,7 +39,7 @@ TEST(ActionsVisitor, VisitLiteral) info); ActionsVisitor(visitor_data).visit(ast); auto actions = visitor_data.getActions(); - ASSERT_EQ(actions->getResultColumns().back().type->getTypeId(), expect_type->getTypeId()); + ASSERT_EQ(actions.getResultColumns().back().type->getTypeId(), expect_type->getTypeId()); } TEST(ActionsVisitor, VisitLiteralWithType) @@ -61,7 +61,7 @@ TEST(ActionsVisitor, VisitLiteralWithType) size_limits_for_set, size_t(0), name_and_types, - std::make_shared(name_and_types), + ActionsDAG(name_and_types), std::make_shared(), false /* no_subqueries */, false /* no_makeset */, @@ -69,5 +69,5 @@ TEST(ActionsVisitor, VisitLiteralWithType) info); ActionsVisitor(visitor_data).visit(ast); auto actions = visitor_data.getActions(); - ASSERT_EQ(actions->getResultColumns().back().type->getTypeId(), date_type->getTypeId()); + ASSERT_EQ(actions.getResultColumns().back().type->getTypeId(), date_type->getTypeId()); } diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp index 41191ba1605..36acc319f4e 100644 --- a/src/Interpreters/tests/gtest_filecache.cpp +++ b/src/Interpreters/tests/gtest_filecache.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -333,6 +334,7 @@ public: TEST_F(FileCacheTest, LRUPolicy) { + ServerUUID::setRandomForUnitTests(); DB::ThreadStatus thread_status; /// To work with cache need query_id and query context. @@ -807,6 +809,7 @@ TEST_F(FileCacheTest, LRUPolicy) TEST_F(FileCacheTest, writeBuffer) { + ServerUUID::setRandomForUnitTests(); FileCacheSettings settings; settings.max_size = 100; settings.max_elements = 5; @@ -938,6 +941,7 @@ static size_t readAllTemporaryData(TemporaryFileStream & stream) TEST_F(FileCacheTest, temporaryData) { + ServerUUID::setRandomForUnitTests(); DB::FileCacheSettings settings; settings.max_size = 10_KiB; settings.max_file_segment_size = 1_KiB; @@ -1044,6 +1048,7 @@ TEST_F(FileCacheTest, temporaryData) TEST_F(FileCacheTest, CachedReadBuffer) { + ServerUUID::setRandomForUnitTests(); DB::ThreadStatus thread_status; /// To work with cache need query_id and query context. @@ -1120,6 +1125,7 @@ TEST_F(FileCacheTest, CachedReadBuffer) TEST_F(FileCacheTest, TemporaryDataReadBufferSize) { + ServerUUID::setRandomForUnitTests(); /// Temporary data stored in cache { DB::FileCacheSettings settings; @@ -1167,6 +1173,7 @@ TEST_F(FileCacheTest, TemporaryDataReadBufferSize) TEST_F(FileCacheTest, SLRUPolicy) { + ServerUUID::setRandomForUnitTests(); DB::ThreadStatus thread_status; std::string query_id = "query_id"; /// To work with cache need query_id and query context. diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 0bd4b94d999..35b96bce42a 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -321,7 +321,12 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log bool should_log_to_console = isatty(STDIN_FILENO) || isatty(STDERR_FILENO); if (config.getBool("logger.console", false) || (!config.hasProperty("logger.console") && !is_daemon && should_log_to_console)) - split->setLevel("console", log_level); + { + auto console_log_level_string = config.getString("logger.console_log_level", log_level_string); + auto console_log_level = Poco::Logger::parseLevel(console_log_level_string); + max_log_level = std::max(console_log_level, max_log_level); + split->setLevel("console", console_log_level); + } else split->setLevel("console", 0); diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index f104e715452..58eeb7c4cbf 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -42,8 +42,8 @@ ASTPtr ASTAlterCommand::clone() const res->projection_decl = res->children.emplace_back(projection_decl->clone()).get(); if (projection) res->projection = res->children.emplace_back(projection->clone()).get(); - if (statistic_decl) - res->statistic_decl = res->children.emplace_back(statistic_decl->clone()).get(); + if (statistics_decl) + res->statistics_decl = res->children.emplace_back(statistics_decl->clone()).get(); if (partition) res->partition = res->children.emplace_back(partition->clone()).get(); if (predicate) @@ -60,6 +60,8 @@ ASTPtr ASTAlterCommand::clone() const res->settings_resets = res->children.emplace_back(settings_resets->clone()).get(); if (select) res->select = res->children.emplace_back(select->clone()).get(); + if (sql_security) + res->sql_security = res->children.emplace_back(sql_security->clone()).get(); if (rename_to) res->rename_to = res->children.emplace_back(rename_to->clone()).get(); @@ -200,27 +202,33 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & partition->formatImpl(settings, state, frame); } } - else if (type == ASTAlterCommand::ADD_STATISTIC) + else if (type == ASTAlterCommand::ADD_STATISTICS) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD STATISTIC " << (if_not_exists ? "IF NOT EXISTS " : "") + settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD STATISTICS " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); - statistic_decl->formatImpl(settings, state, frame); + statistics_decl->formatImpl(settings, state, frame); } - else if (type == ASTAlterCommand::DROP_STATISTIC) + else if (type == ASTAlterCommand::MODIFY_STATISTICS) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << (clear_statistic ? "CLEAR " : "DROP ") << "STATISTIC " + settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY STATISTICS " + << (settings.hilite ? hilite_none : ""); + statistics_decl->formatImpl(settings, state, frame); + } + else if (type == ASTAlterCommand::DROP_STATISTICS) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << (clear_statistics ? "CLEAR " : "DROP ") << "STATISTICS " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); - statistic_decl->formatImpl(settings, state, frame); + statistics_decl->formatImpl(settings, state, frame); if (partition) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : ""); partition->formatImpl(settings, state, frame); } } - else if (type == ASTAlterCommand::MATERIALIZE_STATISTIC) + else if (type == ASTAlterCommand::MATERIALIZE_STATISTICS) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "MATERIALIZE STATISTIC " << (settings.hilite ? hilite_none : ""); - statistic_decl->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "MATERIALIZE STATISTICS " << (settings.hilite ? hilite_none : ""); + statistics_decl->formatImpl(settings, state, frame); if (partition) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN PARTITION " << (settings.hilite ? hilite_none : ""); @@ -507,7 +515,7 @@ void ASTAlterCommand::forEachPointerToChild(std::function f) f(reinterpret_cast(&constraint)); f(reinterpret_cast(&projection_decl)); f(reinterpret_cast(&projection)); - f(reinterpret_cast(&statistic_decl)); + f(reinterpret_cast(&statistics_decl)); f(reinterpret_cast(&partition)); f(reinterpret_cast(&predicate)); f(reinterpret_cast(&update_assignments)); @@ -516,6 +524,7 @@ void ASTAlterCommand::forEachPointerToChild(std::function f) f(reinterpret_cast(&settings_changes)); f(reinterpret_cast(&settings_resets)); f(reinterpret_cast(&select)); + f(reinterpret_cast(&sql_security)); f(reinterpret_cast(&rename_to)); } diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index a3cab1688c2..d7269bed2da 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -55,9 +55,10 @@ public: DROP_PROJECTION, MATERIALIZE_PROJECTION, - ADD_STATISTIC, - DROP_STATISTIC, - MATERIALIZE_STATISTIC, + ADD_STATISTICS, + DROP_STATISTICS, + MODIFY_STATISTICS, + MATERIALIZE_STATISTICS, DROP_PARTITION, DROP_DETACHED_PARTITION, @@ -135,7 +136,7 @@ public: */ IAST * projection = nullptr; - IAST * statistic_decl = nullptr; + IAST * statistics_decl = nullptr; /** Used in DROP PARTITION, ATTACH PARTITION FROM, FORGET PARTITION, UPDATE, DELETE queries. * The value or ID of the partition is stored here. @@ -180,7 +181,7 @@ public: bool clear_index = false; /// for CLEAR INDEX (do not drop index from metadata) - bool clear_statistic = false; /// for CLEAR STATISTIC (do not drop statistic from metadata) + bool clear_statistics = false; /// for CLEAR STATISTICS (do not drop statistics from metadata) bool clear_projection = false; /// for CLEAR PROJECTION (do not drop projection from metadata) diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 6c29e0bf9d5..4a8a3d2967d 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -39,10 +39,10 @@ ASTPtr ASTColumnDeclaration::clone() const res->children.push_back(res->codec); } - if (stat_type) + if (statistics_desc) { - res->stat_type = stat_type->clone(); - res->children.push_back(res->stat_type); + res->statistics_desc = statistics_desc->clone(); + res->children.push_back(res->statistics_desc); } if (ttl) @@ -111,10 +111,10 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, Fo codec->formatImpl(format_settings, state, frame); } - if (stat_type) + if (statistics_desc) { format_settings.ostr << ' '; - stat_type->formatImpl(format_settings, state, frame); + statistics_desc->formatImpl(format_settings, state, frame); } if (ttl) diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index d775928d05c..914916d5074 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -19,7 +19,7 @@ public: bool ephemeral_default = false; ASTPtr comment; ASTPtr codec; - ASTPtr stat_type; + ASTPtr statistics_desc; ASTPtr ttl; ASTPtr collation; ASTPtr settings; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 3e5c6a9d86e..d56a2724914 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -404,8 +404,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat << quoteString(toString(to_inner_uuid)); } + bool should_add_empty = is_create_empty; + auto add_empty_if_needed = [&] + { + if (!should_add_empty) + return; + should_add_empty = false; + settings.ostr << (settings.hilite ? hilite_keyword : "") << " EMPTY" << (settings.hilite ? hilite_none : ""); + }; + if (!as_table.empty()) { + add_empty_if_needed(); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "") << (!as_database.empty() ? backQuoteIfNeed(as_database) + "." : "") << backQuoteIfNeed(as_table); @@ -423,6 +433,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat frame.expression_list_always_start_on_new_line = false; } + add_empty_if_needed(); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : ""); as_table_function->formatImpl(settings, state, frame); } @@ -484,8 +495,8 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat if (is_populate) settings.ostr << (settings.hilite ? hilite_keyword : "") << " POPULATE" << (settings.hilite ? hilite_none : ""); - else if (is_create_empty) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " EMPTY" << (settings.hilite ? hilite_none : ""); + + add_empty_if_needed(); if (sql_security && supportSQLSecurity() && sql_security->as().type.has_value()) { diff --git a/src/Parsers/ASTSQLSecurity.cpp b/src/Parsers/ASTSQLSecurity.cpp index d6f1c21d035..74408747290 100644 --- a/src/Parsers/ASTSQLSecurity.cpp +++ b/src/Parsers/ASTSQLSecurity.cpp @@ -7,7 +7,7 @@ namespace DB void ASTSQLSecurity::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - if (!type.has_value()) + if (!type) return; if (definer || is_definer_current_user) diff --git a/src/Parsers/ASTStatisticDeclaration.cpp b/src/Parsers/ASTStatisticDeclaration.cpp deleted file mode 100644 index 0e20b020ab3..00000000000 --- a/src/Parsers/ASTStatisticDeclaration.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include - -#include -#include -#include - - -namespace DB -{ - -ASTPtr ASTStatisticDeclaration::clone() const -{ - auto res = std::make_shared(); - - res->set(res->columns, columns->clone()); - res->type = type; - - return res; -} - -std::vector ASTStatisticDeclaration::getColumnNames() const -{ - std::vector result; - result.reserve(columns->children.size()); - for (const ASTPtr & column_ast : columns->children) - { - result.push_back(column_ast->as().name()); - } - return result; - -} - -void ASTStatisticDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const -{ - columns->formatImpl(s, state, frame); - s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); - s.ostr << backQuoteIfNeed(type); -} - -} - diff --git a/src/Parsers/ASTStatisticsDeclaration.cpp b/src/Parsers/ASTStatisticsDeclaration.cpp new file mode 100644 index 00000000000..f9b7a9e29db --- /dev/null +++ b/src/Parsers/ASTStatisticsDeclaration.cpp @@ -0,0 +1,60 @@ +#include +#include + +#include +#include +#include + + +namespace DB +{ + +ASTPtr ASTStatisticsDeclaration::clone() const +{ + auto res = std::make_shared(); + + res->set(res->columns, columns->clone()); + if (types) + res->set(res->types, types->clone()); + + return res; +} + +std::vector ASTStatisticsDeclaration::getColumnNames() const +{ + std::vector result; + result.reserve(columns->children.size()); + for (const ASTPtr & column_ast : columns->children) + { + result.push_back(column_ast->as().name()); + } + return result; + +} + +std::vector ASTStatisticsDeclaration::getTypeNames() const +{ + chassert(types != nullptr); + std::vector result; + result.reserve(types->children.size()); + for (const ASTPtr & column_ast : types->children) + { + result.push_back(column_ast->as().name); + } + return result; + +} + +void ASTStatisticsDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + columns->formatImpl(s, state, frame); + s.ostr << (s.hilite ? hilite_keyword : ""); + if (types) + { + s.ostr << " TYPE " << (s.hilite ? hilite_none : ""); + types->formatImpl(s, state, frame); + } +} + +} + diff --git a/src/Parsers/ASTStatisticDeclaration.h b/src/Parsers/ASTStatisticsDeclaration.h similarity index 74% rename from src/Parsers/ASTStatisticDeclaration.h rename to src/Parsers/ASTStatisticsDeclaration.h index f936c93f2ba..f43567b3c70 100644 --- a/src/Parsers/ASTStatisticDeclaration.h +++ b/src/Parsers/ASTStatisticsDeclaration.h @@ -9,17 +9,17 @@ class ASTFunction; /** name BY columns TYPE typename(args) in create query */ -class ASTStatisticDeclaration : public IAST +class ASTStatisticsDeclaration : public IAST { public: IAST * columns; - /// TODO type should be a list of ASTFunction, for example, 'tdigest(256), hyperloglog(128)', etc. - String type; + IAST * types; /** Get the text that identifies this element. */ String getID(char) const override { return "Stat"; } std::vector getColumnNames() const; + std::vector getTypeNames() const; ASTPtr clone() const override; void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 3a62480dc0c..386ed900960 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -89,6 +89,12 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt password = true; break; } + case AuthenticationType::JWT: + { + prefix = "CLAIMS"; + parameter = true; + break; + } case AuthenticationType::LDAP: { prefix = "SERVER"; @@ -106,7 +112,7 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt } case AuthenticationType::SSL_CERTIFICATE: { - prefix = "CN"; + prefix = ssl_cert_subject_type.value(); parameters = true; break; } diff --git a/src/Parsers/Access/ASTAuthenticationData.h b/src/Parsers/Access/ASTAuthenticationData.h index de166bdf234..7f0644b3437 100644 --- a/src/Parsers/Access/ASTAuthenticationData.h +++ b/src/Parsers/Access/ASTAuthenticationData.h @@ -33,6 +33,7 @@ public: std::optional getPassword() const; std::optional getSalt() const; + std::optional ssl_cert_subject_type; /// CN or SubjectAltName /// If type is empty we use the default password type. /// AuthenticationType::NO_PASSWORD is specified explicitly. diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 02735568a04..6f0ccc76797 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index d4729ab796a..d4a8813e9e4 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -20,7 +20,6 @@ #include #include #include - #include "config.h" namespace DB @@ -65,7 +64,7 @@ namespace bool expect_hash = false; bool expect_ldap_server_name = false; bool expect_kerberos_realm = false; - bool expect_common_names = false; + bool expect_ssl_cert_subjects = false; bool expect_public_ssh_key = false; bool expect_http_auth_server = false; @@ -82,7 +81,7 @@ namespace else if (check_type == AuthenticationType::KERBEROS) expect_kerberos_realm = true; else if (check_type == AuthenticationType::SSL_CERTIFICATE) - expect_common_names = true; + expect_ssl_cert_subjects = true; else if (check_type == AuthenticationType::SSH_KEY) expect_public_ssh_key = true; else if (check_type == AuthenticationType::HTTP) @@ -122,9 +121,10 @@ namespace ASTPtr value; ASTPtr parsed_salt; - ASTPtr common_names; ASTPtr public_ssh_keys; ASTPtr http_auth_scheme; + ASTPtr ssl_cert_subjects; + std::optional ssl_cert_subject_type; if (expect_password || expect_hash) { @@ -153,12 +153,19 @@ namespace return false; } } - else if (expect_common_names) + else if (expect_ssl_cert_subjects) { - if (!ParserKeyword{Keyword::CN}.ignore(pos, expected)) + for (const Keyword &keyword : {Keyword::CN, Keyword::SAN}) + if (ParserKeyword{keyword}.ignore(pos, expected)) + { + ssl_cert_subject_type = toStringView(keyword); + break; + } + + if (!ssl_cert_subject_type) return false; - if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, common_names, expected)) + if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ssl_cert_subjects, expected)) return false; } else if (expect_public_ssh_key) @@ -166,7 +173,7 @@ namespace if (!ParserKeyword{Keyword::BY}.ignore(pos, expected)) return false; - if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, common_names, expected)) + if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, public_ssh_keys, expected)) return false; } else if (expect_http_auth_server) @@ -195,8 +202,11 @@ namespace if (parsed_salt) auth_data->children.push_back(std::move(parsed_salt)); - if (common_names) - auth_data->children = std::move(common_names->children); + if (ssl_cert_subjects) + { + auth_data->ssl_cert_subject_type = ssl_cert_subject_type.value(); + auth_data->children = std::move(ssl_cert_subjects->children); + } if (public_ssh_keys) auth_data->children = std::move(public_ssh_keys->children); diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 97094b00bc6..0ae9ee4833c 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -13,7 +13,7 @@ namespace DB MR_MACROS(ADD_CONSTRAINT, "ADD CONSTRAINT") \ MR_MACROS(ADD_INDEX, "ADD INDEX") \ MR_MACROS(ADD_PROJECTION, "ADD PROJECTION") \ - MR_MACROS(ADD_STATISTIC, "ADD STATISTIC") \ + MR_MACROS(ADD_STATISTICS, "ADD STATISTICS") \ MR_MACROS(ADD, "ADD") \ MR_MACROS(ADMIN_OPTION_FOR, "ADMIN OPTION FOR") \ MR_MACROS(AFTER, "AFTER") \ @@ -83,7 +83,7 @@ namespace DB MR_MACROS(CLEAR_COLUMN, "CLEAR COLUMN") \ MR_MACROS(CLEAR_INDEX, "CLEAR INDEX") \ MR_MACROS(CLEAR_PROJECTION, "CLEAR PROJECTION") \ - MR_MACROS(CLEAR_STATISTIC, "CLEAR STATISTIC") \ + MR_MACROS(CLEAR_STATISTICS, "CLEAR STATISTICS") \ MR_MACROS(CLUSTER, "CLUSTER") \ MR_MACROS(CLUSTERS, "CLUSTERS") \ MR_MACROS(CN, "CN") \ @@ -150,7 +150,7 @@ namespace DB MR_MACROS(DROP_PART, "DROP PART") \ MR_MACROS(DROP_PARTITION, "DROP PARTITION") \ MR_MACROS(DROP_PROJECTION, "DROP PROJECTION") \ - MR_MACROS(DROP_STATISTIC, "DROP STATISTIC") \ + MR_MACROS(DROP_STATISTICS, "DROP STATISTICS") \ MR_MACROS(DROP_TABLE, "DROP TABLE") \ MR_MACROS(DROP_TEMPORARY_TABLE, "DROP TEMPORARY TABLE") \ MR_MACROS(DROP, "DROP") \ @@ -250,6 +250,7 @@ namespace DB MR_MACROS(IS_NOT_NULL, "IS NOT NULL") \ MR_MACROS(IS_NULL, "IS NULL") \ MR_MACROS(JOIN, "JOIN") \ + MR_MACROS(JWT, "JWT") \ MR_MACROS(KERBEROS, "KERBEROS") \ MR_MACROS(KEY_BY, "KEY BY") \ MR_MACROS(KEY, "KEY") \ @@ -279,7 +280,7 @@ namespace DB MR_MACROS(MATERIALIZE_COLUMN, "MATERIALIZE COLUMN") \ MR_MACROS(MATERIALIZE_INDEX, "MATERIALIZE INDEX") \ MR_MACROS(MATERIALIZE_PROJECTION, "MATERIALIZE PROJECTION") \ - MR_MACROS(MATERIALIZE_STATISTIC, "MATERIALIZE STATISTIC") \ + MR_MACROS(MATERIALIZE_STATISTICS, "MATERIALIZE STATISTICS") \ MR_MACROS(MATERIALIZE_TTL, "MATERIALIZE TTL") \ MR_MACROS(MATERIALIZE, "MATERIALIZE") \ MR_MACROS(MATERIALIZED, "MATERIALIZED") \ @@ -304,6 +305,7 @@ namespace DB MR_MACROS(MODIFY_QUERY, "MODIFY QUERY") \ MR_MACROS(MODIFY_REFRESH, "MODIFY REFRESH") \ MR_MACROS(MODIFY_SAMPLE_BY, "MODIFY SAMPLE BY") \ + MR_MACROS(MODIFY_STATISTICS, "MODIFY STATISTICS") \ MR_MACROS(MODIFY_SETTING, "MODIFY SETTING") \ MR_MACROS(MODIFY_SQL_SECURITY, "MODIFY SQL SECURITY") \ MR_MACROS(MODIFY_TTL, "MODIFY TTL") \ @@ -416,6 +418,7 @@ namespace DB MR_MACROS(SALT, "SALT") \ MR_MACROS(SAMPLE_BY, "SAMPLE BY") \ MR_MACROS(SAMPLE, "SAMPLE") \ + MR_MACROS(SAN, "SAN") \ MR_MACROS(SCHEME, "SCHEME") \ MR_MACROS(SECOND, "SECOND") \ MR_MACROS(SECONDS, "SECONDS") \ @@ -447,7 +450,7 @@ namespace DB MR_MACROS(SQL_SECURITY, "SQL SECURITY") \ MR_MACROS(SS, "SS") \ MR_MACROS(START_TRANSACTION, "START TRANSACTION") \ - MR_MACROS(STATISTIC, "STATISTIC") \ + MR_MACROS(STATISTICS, "STATISTICS") \ MR_MACROS(STEP, "STEP") \ MR_MACROS(STORAGE, "STORAGE") \ MR_MACROS(STRICT, "STRICT") \ diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 416f696323c..5997452bcf3 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -703,7 +703,7 @@ bool ParserCodec::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } -bool ParserStatisticType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserStatisticsType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserList stat_type_parser(std::make_unique(), std::make_unique(TokenType::Comma), false); @@ -722,7 +722,7 @@ bool ParserStatisticType::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ++pos; auto function_node = std::make_shared(); - function_node->name = "STATISTIC"; + function_node->name = "STATISTICS"; function_node->arguments = stat_type; function_node->children.push_back(function_node->arguments); diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 14d501e50da..4e3f29bfe0c 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -202,11 +202,11 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/// STATISTIC(tdigest(200)) -class ParserStatisticType : public IParserBase +/// STATISTICS(tdigest(200)) +class ParserStatisticsType : public IParserBase { protected: - const char * getName() const override { return "statistic"; } + const char * getName() const override { return "statistics"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 7cdfaf988a3..fff8383e7b3 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2179,7 +2179,7 @@ public: bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override { - /// kql(table|project ...) + /// kql('table|project ...') /// 0. Parse the kql query /// 1. Parse closing token if (state == 0) diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index 3692a4c73e5..eaf28bbbc41 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB @@ -20,8 +21,9 @@ namespace ErrorCodes class FunctionParameterValuesVisitor { public: - explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_) + explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_, ContextPtr context_) : parameter_values(parameter_values_) + , context(context_) { } @@ -35,6 +37,7 @@ public: private: NameToNameMap & parameter_values; + ContextPtr context; void visitFunction(const ASTFunction & parameter_function) { @@ -64,15 +67,20 @@ private: parameter_values[identifier->name()] = convertFieldToString(cast_literal->value); } } + else + { + ASTPtr res = evaluateConstantExpressionOrIdentifierAsLiteral(expression_list->children[1], context); + parameter_values[identifier->name()] = convertFieldToString(res->as()->value); + } } } } }; -NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast) +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, ContextPtr context) { NameToNameMap parameter_values; - FunctionParameterValuesVisitor(parameter_values).visit(ast); + FunctionParameterValuesVisitor(parameter_values, context).visit(ast); return parameter_values; } diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h index e6ce0e42d06..8c2686dcc65 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.h +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -2,12 +2,13 @@ #include #include +#include namespace DB { /// Find parameters in a query parameter values and collect them into map. -NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast); +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast, ContextPtr context); } diff --git a/src/Parsers/FunctionSecretArgumentsFinderAST.h b/src/Parsers/FunctionSecretArgumentsFinderAST.h index 348b2ca9e3a..5b77485afb0 100644 --- a/src/Parsers/FunctionSecretArgumentsFinderAST.h +++ b/src/Parsers/FunctionSecretArgumentsFinderAST.h @@ -82,6 +82,16 @@ private: /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) findS3FunctionSecretArguments(/* is_cluster_function= */ true); } + else if (function.name == "azureBlobStorage") + { + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (function.name == "azureBlobStorageCluster") + { + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) + findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true); + } else if ((function.name == "remote") || (function.name == "remoteSecure")) { /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) @@ -169,6 +179,43 @@ private: markSecretArgument(url_arg_idx + 2); } + void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function) + { + /// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...) + findSecretNamedArgument("account_key", 1); + return; + } + else if (is_cluster_function && isNamedCollectionName(1)) + { + /// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...) + findSecretNamedArgument("account_key", 2); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) + size_t count = arguments->size(); + if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7)) + { + String second_arg; + if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg)) + { + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature + if (url_arg_idx + 4 < count) + markSecretArgument(url_arg_idx + 4); + } + void findURLSecretArguments() { if (!isNamedCollectionName(0)) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index bea1a627129..440d0c73803 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -853,7 +853,7 @@ Please note that the functions listed below only take constant parameters for no ## KQL() function - create table - `CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName,Age);` + `CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql($$Customers | project LastName,Age$$);` verify the content of `kql_table` `select * from kql_table` @@ -867,12 +867,12 @@ Please note that the functions listed below only take constant parameters for no Age Nullable(UInt8) ) ENGINE = Memory; ``` - `INSERT INTO temp select * from kql(Customers|project FirstName,LastName,Age);` + `INSERT INTO temp select * from kql($$Customers|project FirstName,LastName,Age$$);` verify the content of `temp` `select * from temp` - - Select from kql() - `Select * from kql(Customers|project FirstName)` + - Select from kql(...) + `Select * from kql($$Customers|project FirstName$$)` ## KQL operators: - Tabular expression statements @@ -993,4 +993,3 @@ Please note that the functions listed below only take constant parameters for no - dcount() - dcountif() - bin - \ No newline at end of file diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 1d77007a37c..affce53fbc7 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -301,8 +301,8 @@ String IParserKQLFunction::kqlCallToExpression( }); const auto kql_call = std::format("{}({})", function_name, params_str); - DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); - DB::IParser::Pos tokens_pos(call_tokens, max_depth, max_backtracks); + Tokens call_tokens(kql_call.data(), kql_call.data() + kql_call.length(), 0, true); + IParser::Pos tokens_pos(call_tokens, max_depth, max_backtracks); return DB::IParserKQLFunction::getExpression(tokens_pos); } diff --git a/src/Parsers/Kusto/ParserKQLDistinct.cpp b/src/Parsers/Kusto/ParserKQLDistinct.cpp index 3ec823a61b5..0a4aac64720 100644 --- a/src/Parsers/Kusto/ParserKQLDistinct.cpp +++ b/src/Parsers/Kusto/ParserKQLDistinct.cpp @@ -11,7 +11,7 @@ bool ParserKQLDistinct::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) expr = getExprFromToken(pos); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) diff --git a/src/Parsers/Kusto/ParserKQLExtend.cpp b/src/Parsers/Kusto/ParserKQLExtend.cpp index 41ce296bd25..87a0d7b355a 100644 --- a/src/Parsers/Kusto/ParserKQLExtend.cpp +++ b/src/Parsers/Kusto/ParserKQLExtend.cpp @@ -22,7 +22,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String except_str; String new_extend_str; - Tokens ntokens(extend_expr.c_str(), extend_expr.c_str() + extend_expr.size()); + Tokens ntokens(extend_expr.data(), extend_expr.data() + extend_expr.size(), 0, true); IParser::Pos npos(ntokens, pos.max_depth, pos.max_backtracks); String alias; @@ -76,7 +76,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) apply_alias(); String expr = std::format("SELECT * {}, {} from prev", except_str, new_extend_str); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserSelectQuery().parse(new_pos, select_query, expected)) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index b060ce8d2c7..5c7c22d5b22 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -13,7 +13,7 @@ bool ParserKQLFilter::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String expr = getExprFromToken(pos); ASTPtr where_expression; - Tokens token_filter(expr.c_str(), expr.c_str() + expr.size()); + Tokens token_filter(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos pos_filter(token_filter, pos.max_depth, pos.max_backtracks); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 0eb460757b1..db6f4e7f46c 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -13,7 +13,7 @@ bool ParserKQLLimit::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto expr = getExprFromToken(pos); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.cpp b/src/Parsers/Kusto/ParserKQLMVExpand.cpp index 9beb1c39e34..835d50b42ac 100644 --- a/src/Parsers/Kusto/ParserKQLMVExpand.cpp +++ b/src/Parsers/Kusto/ParserKQLMVExpand.cpp @@ -298,7 +298,7 @@ bool ParserKQLMVExpand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; const String setting_str = "enable_unaligned_array_join = 1"; - Tokens token_settings(setting_str.c_str(), setting_str.c_str() + setting_str.size()); + Tokens token_settings(setting_str.data(), setting_str.data() + setting_str.size(), 0, true); IParser::Pos pos_settings(token_settings, pos.max_depth, pos.max_backtracks); if (!ParserSetQuery(true).parse(pos_settings, setting, expected)) diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index f1ad9d9738b..d2ec059ddba 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -173,7 +173,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & auto date_type_cast = [&](String & src) { - Tokens tokens(src.c_str(), src.c_str() + src.size()); + Tokens tokens(src.data(), src.data() + src.size(), 0, true); IParser::Pos pos(tokens, max_depth, max_backtracks); String res; while (isValidKQLPos(pos)) @@ -200,7 +200,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & auto get_group_expression_alias = [&] { std::vector group_expression_tokens; - Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); + Tokens tokens(group_expression.data(), group_expression.data() + group_expression.size(), 0, true); IParser::Pos pos(tokens, max_depth, max_backtracks); while (isValidKQLPos(pos)) { @@ -413,7 +413,7 @@ bool ParserKQLMakeSeries ::parseImpl(Pos & pos, ASTPtr & node, Expected & expect makeSeries(kql_make_series, node, pos.max_depth, pos.max_backtracks); - Tokens token_main_query(kql_make_series.main_query.c_str(), kql_make_series.main_query.c_str() + kql_make_series.main_query.size()); + Tokens token_main_query(kql_make_series.main_query.data(), kql_make_series.main_query.data() + kql_make_series.main_query.size(), 0, true); IParser::Pos pos_main_query(token_main_query, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, select_expression_list, expected)) diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index d7364cb5fd7..c31c8711008 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,20 +1,26 @@ #include #include #include -#include #include -#include #include #include #include #include #include -#include "KustoFunctions/IParserKQLFunction.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} namespace { -enum class KQLOperatorValue : uint16_t +enum class KQLOperatorValue { none, between, @@ -56,7 +62,8 @@ enum class KQLOperatorValue : uint16_t not_startswith_cs, }; -const std::unordered_map KQLOperator = { +const std::unordered_map KQLOperator = +{ {"between", KQLOperatorValue::between}, {"!between", KQLOperatorValue::not_between}, {"contains", KQLOperatorValue::contains}, @@ -96,44 +103,37 @@ const std::unordered_map KQLOperator = { {"!startswith_cs", KQLOperatorValue::not_startswith_cs}, }; -void rebuildSubqueryForInOperator(DB::ASTPtr & node, bool useLowerCase) +void rebuildSubqueryForInOperator(ASTPtr & node, bool useLowerCase) { //A sub-query for in operator in kql can have multiple columns, but only takes the first column. //A sub-query for in operator in ClickHouse can not have multiple columns //So only take the first column if there are multiple columns. //select * not working for subquery. (a tabular statement without project) - const auto selectColumns = node->children[0]->children[0]->as()->select(); + const auto selectColumns = node->children[0]->children[0]->as()->select(); while (selectColumns->children.size() > 1) selectColumns->children.pop_back(); if (useLowerCase) { - auto args = std::make_shared(); + auto args = std::make_shared(); args->children.push_back(selectColumns->children[0]); - auto func_lower = std::make_shared(); + auto func_lower = std::make_shared(); func_lower->name = "lower"; func_lower->children.push_back(selectColumns->children[0]); func_lower->arguments = args; - if (selectColumns->children[0]->as()) - func_lower->alias = std::move(selectColumns->children[0]->as()->alias); - else if (selectColumns->children[0]->as()) - func_lower->alias = std::move(selectColumns->children[0]->as()->alias); + if (selectColumns->children[0]->as()) + func_lower->alias = std::move(selectColumns->children[0]->as()->alias); + else if (selectColumns->children[0]->as()) + func_lower->alias = std::move(selectColumns->children[0]->as()->alias); - auto funcs = std::make_shared(); + auto funcs = std::make_shared(); funcs->children.push_back(func_lower); selectColumns->children[0] = std::move(funcs); } } } -namespace DB -{ - -namespace ErrorCodes -{ - extern const int SYNTAX_ERROR; -} String KQLOperators::genHasAnyAllOpExpr(std::vector & tokens, IParser::Pos & token_pos, String kql_op, String ch_op) { @@ -166,7 +166,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector & tokens, IParser::P return new_expr; } -String genEqOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos, const String & ch_op) +String genEqOpExprCis(std::vector & tokens, IParser::Pos & token_pos, const String & ch_op) { String tmp_arg(token_pos->begin, token_pos->end); @@ -178,30 +178,30 @@ String genEqOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos new_expr += ch_op + " "; ++token_pos; - if (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier) - new_expr += "lower('" + DB::IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; + if (token_pos->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier) + new_expr += "lower('" + IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; else - new_expr += "lower(" + DB::IParserKQLFunction::getExpression(token_pos) + ")"; + new_expr += "lower(" + IParserKQLFunction::getExpression(token_pos) + ")"; tokens.pop_back(); return new_expr; } -String genInOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos, const String & kql_op, const String & ch_op) +String genInOpExprCis(std::vector & tokens, IParser::Pos & token_pos, const String & kql_op, const String & ch_op) { - DB::ParserKQLTableFunction kqlfun_p; - DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); + ParserKQLTableFunction kqlfun_p; + ParserToken s_lparen(TokenType::OpeningRoundBracket); - DB::ASTPtr select; - DB::Expected expected; + ASTPtr select; + Expected expected; String new_expr; ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); if (tokens.empty()) - throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); new_expr = "lower(" + tokens.back() + ") "; tokens.pop_back(); @@ -218,39 +218,39 @@ String genInOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos --token_pos; new_expr += ch_op; - while (isValidKQLPos(token_pos) && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) + while (isValidKQLPos(token_pos) && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) { auto tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type != DB::TokenType::Comma && token_pos->type != DB::TokenType::ClosingRoundBracket - && token_pos->type != DB::TokenType::OpeningRoundBracket && token_pos->type != DB::TokenType::OpeningSquareBracket - && token_pos->type != DB::TokenType::ClosingSquareBracket && tmp_arg != "~" && tmp_arg != "dynamic") + if (token_pos->type != TokenType::Comma && token_pos->type != TokenType::ClosingRoundBracket + && token_pos->type != TokenType::OpeningRoundBracket && token_pos->type != TokenType::OpeningSquareBracket + && token_pos->type != TokenType::ClosingSquareBracket && tmp_arg != "~" && tmp_arg != "dynamic") { - if (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier) - new_expr += "lower('" + DB::IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; + if (token_pos->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier) + new_expr += "lower('" + IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; else new_expr += "lower(" + tmp_arg + ")"; } else if (tmp_arg != "~" && tmp_arg != "dynamic" && tmp_arg != "[" && tmp_arg != "]") new_expr += tmp_arg; - if (token_pos->type == DB::TokenType::ClosingRoundBracket) + if (token_pos->type == TokenType::ClosingRoundBracket) break; ++token_pos; } return new_expr; } -std::string genInOpExpr(DB::IParser::Pos & token_pos, const std::string & kql_op, const std::string & ch_op) +std::string genInOpExpr(IParser::Pos & token_pos, const std::string & kql_op, const std::string & ch_op) { - DB::ParserKQLTableFunction kqlfun_p; - DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); + ParserKQLTableFunction kqlfun_p; + ParserToken s_lparen(TokenType::OpeningRoundBracket); - DB::ASTPtr select; - DB::Expected expected; + ASTPtr select; + Expected expected; ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); auto pos = token_pos; if (kqlfun_p.parse(pos, select, expected)) diff --git a/src/Parsers/Kusto/ParserKQLPrint.cpp b/src/Parsers/Kusto/ParserKQLPrint.cpp index 37483439f14..dceeed841b6 100644 --- a/src/Parsers/Kusto/ParserKQLPrint.cpp +++ b/src/Parsers/Kusto/ParserKQLPrint.cpp @@ -9,7 +9,7 @@ bool ParserKQLPrint::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr select_expression_list; const String expr = getExprFromToken(pos); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index eab9ee082c5..8542c1be734 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -11,7 +11,7 @@ bool ParserKQLProject ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) expr = getExprFromToken(pos); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 99b2d1da890..626512b6ea1 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -37,7 +37,7 @@ bool ParserKQLBase::parseByString(String expr, ASTPtr & node, uint32_t max_depth { Expected expected; - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos pos(tokens, max_depth, max_backtracks); return parse(pos, node, expected); } @@ -45,7 +45,7 @@ bool ParserKQLBase::parseByString(String expr, ASTPtr & node, uint32_t max_depth bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks) { Expected expected; - Tokens token_subquery(query.c_str(), query.c_str() + query.size()); + Tokens token_subquery(query.data(), query.data() + query.size(), 0, true); IParser::Pos pos_subquery(token_subquery, max_depth, max_backtracks); if (!parser->parse(pos_subquery, select_node, expected)) return false; @@ -123,7 +123,7 @@ bool ParserKQLBase::setSubQuerySource(ASTPtr & select_query, ASTPtr & source, bo String ParserKQLBase::getExprFromToken(const String & text, uint32_t max_depth, uint32_t max_backtracks) { - Tokens tokens(text.c_str(), text.c_str() + text.size()); + Tokens tokens(text.data(), text.data() + text.size(), 0, true); IParser::Pos pos(tokens, max_depth, max_backtracks); return getExprFromToken(pos); @@ -522,7 +522,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) --last_pos; String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); - Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); + Tokens token_subquery(sub_query.data(), sub_query.data() + sub_query.size(), 0, true); IParser::Pos pos_subquery(token_subquery, pos.max_depth, pos.max_backtracks); if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) @@ -543,7 +543,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto oprator = getOperator(op_str); if (oprator) { - Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); + Tokens token_clause(op_calsue.data(), op_calsue.data() + op_calsue.size(), 0, true); IParser::Pos pos_clause(token_clause, pos.max_depth, pos.max_backtracks); if (!oprator->parse(pos_clause, node, expected)) return false; @@ -576,7 +576,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!node->as()->select()) { auto expr = String("*"); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!std::make_unique()->parse(new_pos, node, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 852ba50698d..98847cec2da 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -18,7 +18,7 @@ bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto expr = getExprFromToken(pos); - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + Tokens tokens(expr.data(), expr.data() + expr.size(), 0, true); IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); auto pos_backup = new_pos; diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index e508b69bdff..9c3f35ff3dd 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -2,13 +2,13 @@ #include #include #include -#include #include #include #include #include #include + namespace DB { @@ -63,6 +63,8 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + /// TODO: This code is idiotic, see https://github.com/ClickHouse/ClickHouse/issues/61742 + ParserToken lparen(TokenType::OpeningRoundBracket); ASTPtr string_literal; @@ -101,13 +103,16 @@ bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ++pos; } - Tokens token_kql(kql_statement.data(), kql_statement.data() + kql_statement.size()); - IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); + Tokens tokens_kql(kql_statement.data(), kql_statement.data() + kql_statement.size(), 0, true); + IParser::Pos pos_kql(tokens_kql, pos.max_depth, pos.max_backtracks); + Expected kql_expected; kql_expected.enable_highlighting = false; if (!ParserKQLWithUnionQuery().parse(pos_kql, node, kql_expected)) return false; + ++pos; return true; } + } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index fe9b9adfa2a..b1cd782d36b 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -45,7 +45,7 @@ protected: class ParserKQLTableFunction : public IParserBase { protected: - const char * getName() const override { return "KQL() function"; } + const char * getName() const override { return "KQL function"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 47d706d0b4b..c26115c22b8 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -194,7 +194,7 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected String converted_columns = getExprFromToken(expr_columns, pos.max_depth, pos.max_backtracks); - Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + Tokens token_converted_columns(converted_columns.data(), converted_columns.data() + converted_columns.size(), 0, true); IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) @@ -206,7 +206,7 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected { String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth, pos.max_backtracks); - Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + Tokens token_converted_groupby(converted_groupby.data(), converted_groupby.data() + converted_groupby.size(), 0, true); IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 5f2bd50524c..b4601389696 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -59,9 +59,6 @@ Token quotedStringWithUnicodeQuotes(const char *& pos, const char * const token_ pos = find_first_symbols<'\xE2'>(pos, end); if (pos + 2 >= end) return Token(error_token, token_begin, end); - /// Empty identifiers are not allowed, while empty strings are. - if (success_token == TokenType::QuotedIdentifier && pos + 3 >= end) - return Token(error_token, token_begin, end); if (pos[0] == '\xE2' && pos[1] == '\x80' && pos[2] == expected_end_byte) { diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 6f48f79d942..28dbf781011 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -49,10 +49,11 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_clear_index(Keyword::CLEAR_INDEX); ParserKeyword s_materialize_index(Keyword::MATERIALIZE_INDEX); - ParserKeyword s_add_statistic(Keyword::ADD_STATISTIC); - ParserKeyword s_drop_statistic(Keyword::DROP_STATISTIC); - ParserKeyword s_clear_statistic(Keyword::CLEAR_STATISTIC); - ParserKeyword s_materialize_statistic(Keyword::MATERIALIZE_STATISTIC); + ParserKeyword s_add_statistics(Keyword::ADD_STATISTICS); + ParserKeyword s_drop_statistics(Keyword::DROP_STATISTICS); + ParserKeyword s_modify_statistics(Keyword::MODIFY_STATISTICS); + ParserKeyword s_clear_statistics(Keyword::CLEAR_STATISTICS); + ParserKeyword s_materialize_statistics(Keyword::MATERIALIZE_STATISTICS); ParserKeyword s_add_constraint(Keyword::ADD_CONSTRAINT); ParserKeyword s_drop_constraint(Keyword::DROP_CONSTRAINT); @@ -126,7 +127,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserIdentifier parser_remove_property; ParserCompoundColumnDeclaration parser_col_decl; ParserIndexDeclaration parser_idx_decl; - ParserStatisticDeclaration parser_stat_decl; + ParserStatisticsDeclaration parser_stat_decl; + ParserStatisticsDeclarationWithoutTypes parser_stat_decl_without_types; ParserConstraintDeclaration parser_constraint_decl; ParserProjectionDeclaration parser_projection_decl; ParserCompoundColumnDeclaration parser_modify_col_decl(false, false, true); @@ -154,7 +156,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ASTPtr command_constraint; ASTPtr command_projection_decl; ASTPtr command_projection; - ASTPtr command_statistic_decl; + ASTPtr command_statistics_decl; ASTPtr command_partition; ASTPtr command_predicate; ASTPtr command_update_assignments; @@ -368,36 +370,43 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; } } - else if (s_add_statistic.ignore(pos, expected)) + else if (s_add_statistics.ignore(pos, expected)) { if (s_if_not_exists.ignore(pos, expected)) command->if_not_exists = true; - if (!parser_stat_decl.parse(pos, command_statistic_decl, expected)) + if (!parser_stat_decl.parse(pos, command_statistics_decl, expected)) return false; - command->type = ASTAlterCommand::ADD_STATISTIC; + command->type = ASTAlterCommand::ADD_STATISTICS; } - else if (s_drop_statistic.ignore(pos, expected)) + else if (s_modify_statistics.ignore(pos, expected)) + { + if (!parser_stat_decl.parse(pos, command_statistics_decl, expected)) + return false; + + command->type = ASTAlterCommand::MODIFY_STATISTICS; + } + else if (s_drop_statistics.ignore(pos, expected)) { if (s_if_exists.ignore(pos, expected)) command->if_exists = true; - if (!parser_stat_decl.parse(pos, command_statistic_decl, expected)) + if (!parser_stat_decl_without_types.parse(pos, command_statistics_decl, expected)) return false; - command->type = ASTAlterCommand::DROP_STATISTIC; + command->type = ASTAlterCommand::DROP_STATISTICS; } - else if (s_clear_statistic.ignore(pos, expected)) + else if (s_clear_statistics.ignore(pos, expected)) { if (s_if_exists.ignore(pos, expected)) command->if_exists = true; - if (!parser_stat_decl.parse(pos, command_statistic_decl, expected)) + if (!parser_stat_decl_without_types.parse(pos, command_statistics_decl, expected)) return false; - command->type = ASTAlterCommand::DROP_STATISTIC; - command->clear_statistic = true; + command->type = ASTAlterCommand::DROP_STATISTICS; + command->clear_statistics = true; command->detach = false; if (s_in_partition.ignore(pos, expected)) @@ -406,15 +415,15 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; } } - else if (s_materialize_statistic.ignore(pos, expected)) + else if (s_materialize_statistics.ignore(pos, expected)) { if (s_if_exists.ignore(pos, expected)) command->if_exists = true; - if (!parser_stat_decl.parse(pos, command_statistic_decl, expected)) + if (!parser_stat_decl_without_types.parse(pos, command_statistics_decl, expected)) return false; - command->type = ASTAlterCommand::MATERIALIZE_STATISTIC; + command->type = ASTAlterCommand::MATERIALIZE_STATISTICS; command->detach = false; if (s_in_partition.ignore(pos, expected)) @@ -931,8 +940,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->projection_decl = command->children.emplace_back(std::move(command_projection_decl)).get(); if (command_projection) command->projection = command->children.emplace_back(std::move(command_projection)).get(); - if (command_statistic_decl) - command->statistic_decl = command->children.emplace_back(std::move(command_statistic_decl)).get(); + if (command_statistics_decl) + command->statistics_decl = command->children.emplace_back(std::move(command_statistics_decl)).get(); if (command_partition) command->partition = command->children.emplace_back(std::move(command_partition)).get(); if (command_predicate) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index c1b45871577..014dc7bd3bf 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -225,15 +225,15 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return true; } -bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserStatisticsDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_type(Keyword::TYPE); ParserList columns_p(std::make_unique(), std::make_unique(TokenType::Comma), false); - ParserIdentifier type_p; + ParserList types_p(std::make_unique(), std::make_unique(TokenType::Comma), false); ASTPtr columns; - ASTPtr type; + ASTPtr types; if (!columns_p.parse(pos, columns, expected)) return false; @@ -241,12 +241,29 @@ bool ParserStatisticDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & if (!s_type.ignore(pos, expected)) return false; - if (!type_p.parse(pos, type, expected)) + if (!types_p.parse(pos, types, expected)) return false; - auto stat = std::make_shared(); + auto stat = std::make_shared(); + stat->set(stat->columns, columns); + stat->set(stat->types, types); + node = stat; + + return true; +} + +bool ParserStatisticsDeclarationWithoutTypes::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + + ParserList columns_p(std::make_unique(), std::make_unique(TokenType::Comma), false); + + ASTPtr columns; + + if (!columns_p.parse(pos, columns, expected)) + return false; + + auto stat = std::make_shared(); stat->set(stat->columns, columns); - stat->type = type->as().name(); node = stat; return true; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index d001c097114..bb37491a366 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -138,7 +138,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_auto_increment{Keyword::AUTO_INCREMENT}; ParserKeyword s_comment{Keyword::COMMENT}; ParserKeyword s_codec{Keyword::CODEC}; - ParserKeyword s_stat{Keyword::STATISTIC}; + ParserKeyword s_stat{Keyword::STATISTICS}; ParserKeyword s_ttl{Keyword::TTL}; ParserKeyword s_remove{Keyword::REMOVE}; ParserKeyword s_modify_setting(Keyword::MODIFY_SETTING); @@ -155,7 +155,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserLiteral literal_parser; ParserCodec codec_parser; ParserCollation collation_parser; - ParserStatisticType stat_type_parser; + ParserStatisticsType stat_type_parser; ParserExpression expression_parser; ParserSetQuery settings_parser(true); @@ -193,7 +193,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr default_expression; ASTPtr comment_expression; ASTPtr codec_expression; - ASTPtr stat_type_expression; + ASTPtr statistics_desc_expression; ASTPtr ttl_expression; ASTPtr collation_expression; ASTPtr settings; @@ -213,6 +213,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return res; }; + /// Keep this list of keywords in sync with ParserDataType::parseImpl(). if (!null_check_without_moving() && !s_default.checkWithoutMoving(pos, expected) && !s_materialized.checkWithoutMoving(pos, expected) @@ -325,7 +326,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (s_stat.ignore(pos, expected)) { - if (!stat_type_parser.parse(pos, stat_type_expression, expected)) + if (!stat_type_parser.parse(pos, statistics_desc_expression, expected)) return false; } @@ -398,10 +399,10 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(settings)); } - if (stat_type_expression) + if (statistics_desc_expression) { - column_declaration->stat_type = stat_type_expression; - column_declaration->children.push_back(std::move(stat_type_expression)); + column_declaration->statistics_desc = statistics_desc_expression; + column_declaration->children.push_back(std::move(statistics_desc_expression)); } if (ttl_expression) @@ -452,16 +453,27 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -class ParserStatisticDeclaration : public IParserBase +class ParserStatisticsDeclaration : public IParserBase { public: - ParserStatisticDeclaration() = default; + ParserStatisticsDeclaration() = default; protected: const char * getName() const override { return "statistics declaration"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserStatisticsDeclarationWithoutTypes : public IParserBase +{ +public: + ParserStatisticsDeclarationWithoutTypes() = default; + +protected: + const char * getName() const override { return "statistics declaration"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + class ParserConstraintDeclaration : public IParserBase { protected: diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index b5bc9f89990..ad33c7e4558 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -103,12 +104,28 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; tryGetIdentifierNameInto(identifier, type_name); - /// Don't accept things like Array(`x.y`). + /// When parsing we accept quoted type names (e.g. `UInt64`), but when formatting we print them + /// unquoted (e.g. UInt64). This introduces problems when the string in the quotes is garbage: + /// * Array(`x.y`) -> Array(x.y) -> fails to parse + /// * `Null` -> Null -> parses as keyword instead of type name + /// Here we check for these cases and reject. if (!std::all_of(type_name.begin(), type_name.end(), [](char c) { return isWordCharASCII(c) || c == '$'; })) { expected.add(pos, "type name"); return false; } + /// Keywords that IParserColumnDeclaration recognizes before the type name. + /// E.g. reject CREATE TABLE a (x `Null`) because in "x Null" the Null would be parsed as + /// column attribute rather than type name. + { + String n = type_name; + boost::to_upper(n); + if (n == "NOT" || n == "NULL" || n == "DEFAULT" || n == "MATERIALIZED" || n == "EPHEMERAL" || n == "ALIAS" || n == "AUTO" || n == "PRIMARY" || n == "COMMENT" || n == "CODEC") + { + expected.add(pos, "type name"); + return false; + } + } String type_name_upper = Poco::toUpper(type_name); String type_name_suffix; diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index 207ddadb8bf..0d18ee5439e 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -21,6 +21,7 @@ class Tokens { private: std::vector data; + size_t max_pos = 0; Lexer lexer; bool skip_insignificant; @@ -35,10 +36,16 @@ public: while (true) { if (index < data.size()) + { + max_pos = std::max(max_pos, index); return data[index]; + } if (!data.empty() && data.back().isEnd()) + { + max_pos = data.size() - 1; return data.back(); + } Token token = lexer.nextToken(); @@ -51,7 +58,12 @@ public: { if (data.empty()) return (*this)[0]; - return data.back(); + return data[max_pos]; + } + + void reset() + { + max_pos = 0; } }; diff --git a/src/Parsers/formatAST.h b/src/Parsers/formatAST.h index dd72a59b4a2..e34902663dd 100644 --- a/src/Parsers/formatAST.h +++ b/src/Parsers/formatAST.h @@ -40,7 +40,7 @@ struct fmt::formatter } template - auto format(const DB::ASTPtr & ast, FormatContext & context) + auto format(const DB::ASTPtr & ast, FormatContext & context) const { return fmt::format_to(context.out(), "{}", DB::serializeAST(*ast)); } diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 41c51267496..fab5dac8f87 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -285,6 +286,33 @@ ASTPtr tryParseQuery( } Expected expected; + + /** A shortcut - if Lexer found invalid tokens, fail early without full parsing. + * But there are certain cases when invalid tokens are permitted: + * 1. INSERT queries can have arbitrary data after the FORMAT clause, that is parsed by a different parser. + * 2. It can also be the case when there are multiple queries separated by semicolons, and the first queries are ok + * while subsequent queries have syntax errors. + * + * This shortcut is needed to avoid complex backtracking in case of obviously erroneous queries. + */ + IParser::Pos lookahead(token_iterator); + if (!ParserKeyword(Keyword::INSERT_INTO).ignore(lookahead)) + { + while (lookahead->type != TokenType::Semicolon && lookahead->type != TokenType::EndOfStream) + { + if (lookahead->isError()) + { + out_error_message = getLexicalErrorMessage(query_begin, all_queries_end, *lookahead, hilite, query_description); + return nullptr; + } + + ++lookahead; + } + + /// We should not spoil the info about maximum parsed position in the original iterator. + tokens.reset(); + } + ASTPtr res; const bool parse_res = parser.parse(token_iterator, res, expected); const auto last_token = token_iterator.max(); diff --git a/src/Planner/ActionsChain.cpp b/src/Planner/ActionsChain.cpp index c5438b5d2d4..1b594c5f2a1 100644 --- a/src/Planner/ActionsChain.cpp +++ b/src/Planner/ActionsChain.cpp @@ -11,7 +11,7 @@ namespace DB { -ActionsChainStep::ActionsChainStep(ActionsDAGPtr actions_, +ActionsChainStep::ActionsChainStep(ActionsAndProjectInputsFlagPtr actions_, bool use_actions_nodes_as_output_columns_, ColumnsWithTypeAndName additional_output_columns_) : actions(std::move(actions_)) @@ -28,12 +28,12 @@ void ActionsChainStep::finalizeInputAndOutputColumns(const NameSet & child_input auto child_input_columns_copy = child_input_columns; std::unordered_set output_nodes_names; - output_nodes_names.reserve(actions->getOutputs().size()); + output_nodes_names.reserve(actions->dag.getOutputs().size()); - for (auto & output_node : actions->getOutputs()) + for (auto & output_node : actions->dag.getOutputs()) output_nodes_names.insert(output_node->result_name); - for (const auto & node : actions->getNodes()) + for (const auto & node : actions->dag.getNodes()) { auto it = child_input_columns_copy.find(node.result_name); if (it == child_input_columns_copy.end()) @@ -45,20 +45,20 @@ void ActionsChainStep::finalizeInputAndOutputColumns(const NameSet & child_input if (output_nodes_names.contains(node.result_name)) continue; - actions->getOutputs().push_back(&node); + actions->dag.getOutputs().push_back(&node); output_nodes_names.insert(node.result_name); } - actions->removeUnusedActions(); + actions->dag.removeUnusedActions(); /// TODO: Analyzer fix ActionsDAG input and constant nodes with same name - actions->projectInput(); + actions->project_input = true; initialize(); } void ActionsChainStep::dump(WriteBuffer & buffer) const { buffer << "DAG" << '\n'; - buffer << actions->dumpDAG(); + buffer << actions->dag.dumpDAG(); if (!available_output_columns.empty()) { @@ -84,7 +84,7 @@ String ActionsChainStep::dump() const void ActionsChainStep::initialize() { - auto required_columns_names = actions->getRequiredColumnsNames(); + auto required_columns_names = actions->dag.getRequiredColumnsNames(); input_columns_names = NameSet(required_columns_names.begin(), required_columns_names.end()); available_output_columns.clear(); @@ -93,7 +93,7 @@ void ActionsChainStep::initialize() { std::unordered_set available_output_columns_names; - for (const auto & node : actions->getNodes()) + for (const auto & node : actions->dag.getNodes()) { if (available_output_columns_names.contains(node.result_name)) continue; diff --git a/src/Planner/ActionsChain.h b/src/Planner/ActionsChain.h index 4907fdbad87..3bce19786e6 100644 --- a/src/Planner/ActionsChain.h +++ b/src/Planner/ActionsChain.h @@ -48,18 +48,18 @@ public: * If use_actions_nodes_as_output_columns = true output columns are initialized using actions dag nodes. * If additional output columns are specified they are added to output columns. */ - explicit ActionsChainStep(ActionsDAGPtr actions_, + explicit ActionsChainStep(ActionsAndProjectInputsFlagPtr actions_, bool use_actions_nodes_as_output_columns = true, ColumnsWithTypeAndName additional_output_columns_ = {}); /// Get actions - ActionsDAGPtr & getActions() + ActionsAndProjectInputsFlagPtr & getActions() { return actions; } /// Get actions - const ActionsDAGPtr & getActions() const + const ActionsAndProjectInputsFlagPtr & getActions() const { return actions; } @@ -98,7 +98,7 @@ public: private: void initialize(); - ActionsDAGPtr actions; + ActionsAndProjectInputsFlagPtr actions; bool use_actions_nodes_as_output_columns = true; diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 27b5909c13b..d5e39a9f123 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -90,7 +90,7 @@ public: ActionsDAGPtr alias_column_actions_dag = std::make_shared(); PlannerActionsVisitor actions_visitor(planner_context, false); - auto outputs = actions_visitor.visit(alias_column_actions_dag, column_node->getExpression()); + auto outputs = actions_visitor.visit(*alias_column_actions_dag, column_node->getExpression()); if (outputs.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single output in actions dag for alias column {}. Actual {}", column_node->dumpTree(), outputs.size()); @@ -340,7 +340,7 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr QueryTreeNodePtr query_tree_node = query_node_typed.getPrewhere(); PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = visitor.visit(prewhere_actions_dag, query_tree_node); + auto expression_nodes = visitor.visit(*prewhere_actions_dag, query_tree_node); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Invalid PREWHERE. Expected single boolean expression. In query {}", diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index b40e23a9553..2d42ed73223 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -166,7 +166,7 @@ FiltersForTableExpressionMap collectFiltersForAnalysis(const QueryTreeNodePtr & continue; const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); - if (typeid_cast(storage.get()) || typeid_cast(storage.get()) + if (typeid_cast(storage.get()) || (parallel_replicas_estimation_enabled && std::dynamic_pointer_cast(storage))) { collect_filters = true; @@ -329,12 +329,16 @@ public: }; void addExpressionStep(QueryPlan & query_plan, - const ActionsDAGPtr & expression_actions, + const ActionsAndProjectInputsFlagPtr & expression_actions, const std::string & step_description, std::vector & result_actions_to_execute) { - result_actions_to_execute.push_back(expression_actions); - auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), expression_actions); + auto actions = expression_actions->dag.clone(); + if (expression_actions->project_input) + actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + result_actions_to_execute.push_back(actions); + auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), actions); expression_step->setStepDescription(step_description); query_plan.addStep(std::move(expression_step)); } @@ -344,9 +348,13 @@ void addFilterStep(QueryPlan & query_plan, const std::string & step_description, std::vector & result_actions_to_execute) { - result_actions_to_execute.push_back(filter_analysis_result.filter_actions); + auto actions = filter_analysis_result.filter_actions->dag.clone(); + if (filter_analysis_result.filter_actions->project_input) + actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + result_actions_to_execute.push_back(actions); auto where_step = std::make_unique(query_plan.getCurrentDataStream(), - filter_analysis_result.filter_actions, + actions, filter_analysis_result.filter_column_name, filter_analysis_result.remove_filter_column); where_step->setStepDescription(step_description); @@ -545,14 +553,21 @@ void addTotalsHavingStep(QueryPlan & query_plan, const auto & having_analysis_result = expression_analysis_result.getHaving(); bool need_finalize = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); + ActionsDAGPtr actions; if (having_analysis_result.filter_actions) - result_actions_to_execute.push_back(having_analysis_result.filter_actions); + { + actions = having_analysis_result.filter_actions->dag.clone(); + if (having_analysis_result.filter_actions->project_input) + actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + result_actions_to_execute.push_back(actions); + } auto totals_having_step = std::make_unique( query_plan.getCurrentDataStream(), aggregation_analysis_result.aggregate_descriptions, query_analysis_result.aggregate_overflow_row, - having_analysis_result.filter_actions, + actions, having_analysis_result.filter_column_name, having_analysis_result.remove_filter_column, settings.totals_mode, @@ -728,12 +743,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, auto & interpolate_node_typed = interpolate_node->as(); PlannerActionsVisitor planner_actions_visitor(planner_context); - auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, + auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, interpolate_node_typed.getExpression()); if (expression_to_interpolate_expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node"); - auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, + auto interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); if (interpolate_expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node"); diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 837307ba2ca..7a12d5d690d 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -413,11 +413,11 @@ private: class ActionsScopeNode { public: - explicit ActionsScopeNode(ActionsDAGPtr actions_dag_, QueryTreeNodePtr scope_node_) - : actions_dag(std::move(actions_dag_)) + explicit ActionsScopeNode(ActionsDAG & actions_dag_, QueryTreeNodePtr scope_node_) + : actions_dag(actions_dag_) , scope_node(std::move(scope_node_)) { - for (const auto & node : actions_dag->getNodes()) + for (const auto & node : actions_dag.getNodes()) node_name_to_node[node.result_name] = &node; } @@ -456,7 +456,7 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "No node with name {}. There are only nodes {}", node_name, - actions_dag->dumpNames()); + actions_dag.dumpNames()); return it->second; } @@ -467,7 +467,7 @@ public: if (it != node_name_to_node.end()) return it->second; - const auto * node = &actions_dag->addInput(node_name, column_type); + const auto * node = &actions_dag.addInput(node_name, column_type); node_name_to_node[node->result_name] = node; return node; @@ -479,7 +479,7 @@ public: if (it != node_name_to_node.end()) return it->second; - const auto * node = &actions_dag->addInput(column); + const auto * node = &actions_dag.addInput(column); node_name_to_node[node->result_name] = node; return node; @@ -491,7 +491,7 @@ public: if (it != node_name_to_node.end()) return it->second; - const auto * node = &actions_dag->addColumn(column); + const auto * node = &actions_dag.addColumn(column); node_name_to_node[node->result_name] = node; return node; @@ -504,7 +504,7 @@ public: if (it != node_name_to_node.end()) return it->second; - const auto * node = &actions_dag->addFunction(function, children, node_name); + const auto * node = &actions_dag.addFunction(function, children, node_name); node_name_to_node[node->result_name] = node; return node; @@ -516,7 +516,7 @@ public: if (it != node_name_to_node.end()) return it->second; - const auto * node = &actions_dag->addArrayJoin(*child, node_name); + const auto * node = &actions_dag.addArrayJoin(*child, node_name); node_name_to_node[node->result_name] = node; return node; @@ -524,14 +524,14 @@ public: private: std::unordered_map node_name_to_node; - ActionsDAGPtr actions_dag; + ActionsDAG & actions_dag; QueryTreeNodePtr scope_node; }; class PlannerActionsVisitorImpl { public: - PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, + PlannerActionsVisitorImpl(ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_); @@ -595,14 +595,14 @@ private: bool use_column_identifier_as_action_node_name; }; -PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, +PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag, const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_) : planner_context(planner_context_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) { - actions_stack.emplace_back(std::move(actions_dag), nullptr); + actions_stack.emplace_back(actions_dag, nullptr); } ActionsDAG::NodeRawConstPtrs PlannerActionsVisitorImpl::visit(QueryTreeNodePtr expression_node) @@ -758,7 +758,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi } auto lambda_actions_dag = std::make_shared(); - actions_stack.emplace_back(lambda_actions_dag, node); + actions_stack.emplace_back(*lambda_actions_dag, node); auto [lambda_expression_node_name, levels] = visitImpl(lambda_node.getExpression()); lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); @@ -886,7 +886,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi for (const auto & argument : function_node.getArguments()) { - auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(index_hint_actions_dag, argument); + auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(*index_hint_actions_dag, argument); for (auto & expression_dag_node : index_hint_argument_expression_dag_nodes) { @@ -1013,7 +1013,7 @@ PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_c , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) {} -ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node) +ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node) { PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name); return actions_visitor_impl.visit(expression_node); diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h index 8506c309171..6bb32047327 100644 --- a/src/Planner/PlannerActionsVisitor.h +++ b/src/Planner/PlannerActionsVisitor.h @@ -37,7 +37,7 @@ public: * Necessary actions are not added in actions dag output. * Returns query tree expression node actions dag nodes. */ - ActionsDAG::NodeRawConstPtrs visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node); + ActionsDAG::NodeRawConstPtrs visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node); private: const PlannerContextPtr planner_context; diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index f0a2845c3e8..ceb506d1bbb 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include @@ -37,15 +38,21 @@ namespace * Actions before filter are added into into actions chain. * It is client responsibility to update filter analysis result if filter column must be removed after chain is finalized. */ -FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_node, +std::optional analyzeFilter(const QueryTreeNodePtr & filter_expression_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { FilterAnalysisResult result; - result.filter_actions = buildActionsDAGFromExpressionNode(filter_expression_node, input_columns, planner_context); - result.filter_column_name = result.filter_actions->getOutputs().at(0)->result_name; + result.filter_actions = std::make_shared(); + result.filter_actions->dag = buildActionsDAGFromExpressionNode(filter_expression_node, input_columns, planner_context); + + const auto * output = result.filter_actions->dag.getOutputs().at(0); + if (output->column && ConstantFilterDescription(*output->column).always_true) + return {}; + + result.filter_column_name = output->result_name; actions_chain.addStep(std::make_unique(result.filter_actions)); return result; @@ -111,8 +118,9 @@ std::optional analyzeAggregation(const QueryTreeNodeP Names aggregation_keys; - ActionsDAGPtr before_aggregation_actions = std::make_shared(input_columns); - before_aggregation_actions->getOutputs().clear(); + ActionsAndProjectInputsFlagPtr before_aggregation_actions = std::make_shared(); + before_aggregation_actions->dag = ActionsDAG(input_columns); + before_aggregation_actions->dag.getOutputs().clear(); std::unordered_set before_aggregation_actions_output_node_names; @@ -147,7 +155,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP if (constant_key && !aggregates_descriptions.empty() && (!check_constants_for_group_by_key || canRemoveConstantFromGroupByKey(*constant_key))) continue; - auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, grouping_set_key_node); + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions->dag, grouping_set_key_node); aggregation_keys.reserve(expression_dag_nodes.size()); for (auto & expression_dag_node : expression_dag_nodes) @@ -160,7 +168,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP auto column_after_aggregation = group_by_use_nulls && expression_dag_node->column != nullptr ? makeNullableSafe(expression_dag_node->column) : expression_dag_node->column; available_columns_after_aggregation.emplace_back(std::move(column_after_aggregation), expression_type_after_aggregation, expression_dag_node->result_name); aggregation_keys.push_back(expression_dag_node->result_name); - before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions->dag.getOutputs().push_back(expression_dag_node); before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -199,7 +207,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP if (constant_key && !aggregates_descriptions.empty() && (!check_constants_for_group_by_key || canRemoveConstantFromGroupByKey(*constant_key))) continue; - auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, group_by_key_node); + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions->dag, group_by_key_node); aggregation_keys.reserve(expression_dag_nodes.size()); for (auto & expression_dag_node : expression_dag_nodes) @@ -211,7 +219,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP auto column_after_aggregation = group_by_use_nulls && expression_dag_node->column != nullptr ? makeNullableSafe(expression_dag_node->column) : expression_dag_node->column; available_columns_after_aggregation.emplace_back(std::move(column_after_aggregation), expression_type_after_aggregation, expression_dag_node->result_name); aggregation_keys.push_back(expression_dag_node->result_name); - before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions->dag.getOutputs().push_back(expression_dag_node); before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -225,13 +233,13 @@ std::optional analyzeAggregation(const QueryTreeNodeP auto & aggregate_function_node_typed = aggregate_function_node->as(); for (const auto & aggregate_function_node_argument : aggregate_function_node_typed.getArguments().getNodes()) { - auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, aggregate_function_node_argument); + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions->dag, aggregate_function_node_argument); for (auto & expression_dag_node : expression_dag_nodes) { if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions->dag.getOutputs().push_back(expression_dag_node); before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -278,8 +286,9 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query PlannerActionsVisitor actions_visitor(planner_context); - ActionsDAGPtr before_window_actions = std::make_shared(input_columns); - before_window_actions->getOutputs().clear(); + ActionsAndProjectInputsFlagPtr before_window_actions = std::make_shared(); + before_window_actions->dag = ActionsDAG(input_columns); + before_window_actions->dag.getOutputs().clear(); std::unordered_set before_window_actions_output_node_names; @@ -288,25 +297,25 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query auto & window_function_node_typed = window_function_node->as(); auto & window_node = window_function_node_typed.getWindowNode()->as(); - auto expression_dag_nodes = actions_visitor.visit(before_window_actions, window_function_node_typed.getArgumentsNode()); + auto expression_dag_nodes = actions_visitor.visit(before_window_actions->dag, window_function_node_typed.getArgumentsNode()); for (auto & expression_dag_node : expression_dag_nodes) { if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions->dag.getOutputs().push_back(expression_dag_node); before_window_actions_output_node_names.insert(expression_dag_node->result_name); } - expression_dag_nodes = actions_visitor.visit(before_window_actions, window_node.getPartitionByNode()); + expression_dag_nodes = actions_visitor.visit(before_window_actions->dag, window_node.getPartitionByNode()); for (auto & expression_dag_node : expression_dag_nodes) { if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions->dag.getOutputs().push_back(expression_dag_node); before_window_actions_output_node_names.insert(expression_dag_node->result_name); } @@ -317,14 +326,14 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query for (auto & sort_node : order_by_node_list.getNodes()) { auto & sort_node_typed = sort_node->as(); - expression_dag_nodes = actions_visitor.visit(before_window_actions, sort_node_typed.getExpression()); + expression_dag_nodes = actions_visitor.visit(before_window_actions->dag, sort_node_typed.getExpression()); for (auto & expression_dag_node : expression_dag_nodes) { if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions->dag.getOutputs().push_back(expression_dag_node); before_window_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -357,7 +366,8 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { - auto projection_actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), input_columns, planner_context); + auto projection_actions = std::make_shared(); + projection_actions->dag = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), input_columns, planner_context); auto projection_columns = query_node.getProjectionColumns(); size_t projection_columns_size = projection_columns.size(); @@ -366,7 +376,7 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, NamesWithAliases projection_column_names_with_display_aliases; projection_column_names_with_display_aliases.reserve(projection_columns_size); - auto & projection_actions_outputs = projection_actions->getOutputs(); + auto & projection_actions_outputs = projection_actions->dag.getOutputs(); size_t projection_outputs_size = projection_actions_outputs.size(); if (projection_columns_size != projection_outputs_size) @@ -404,8 +414,9 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { - ActionsDAGPtr before_sort_actions = std::make_shared(input_columns); - auto & before_sort_actions_outputs = before_sort_actions->getOutputs(); + auto before_sort_actions = std::make_shared(); + before_sort_actions->dag = ActionsDAG(input_columns); + auto & before_sort_actions_outputs = before_sort_actions->dag.getOutputs(); before_sort_actions_outputs.clear(); PlannerActionsVisitor actions_visitor(planner_context); @@ -419,7 +430,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, for (const auto & sort_node : order_by_node_list.getNodes()) { auto & sort_node_typed = sort_node->as(); - auto expression_dag_nodes = actions_visitor.visit(before_sort_actions, sort_node_typed.getExpression()); + auto expression_dag_nodes = actions_visitor.visit(before_sort_actions->dag, sort_node_typed.getExpression()); has_with_fill |= sort_node_typed.withFill(); for (auto & action_dag_node : expression_dag_nodes) @@ -435,7 +446,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, if (has_with_fill) { for (auto & output_node : before_sort_actions_outputs) - output_node = &before_sort_actions->materializeNode(*output_node); + output_node = &before_sort_actions->dag.materializeNode(*output_node); } /// We add only INPUT columns necessary for INTERPOLATE expression in before ORDER BY actions DAG @@ -444,7 +455,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, auto & interpolate_list_node = query_node.getInterpolate()->as(); PlannerActionsVisitor interpolate_actions_visitor(planner_context); - auto interpolate_actions_dag = std::make_shared(); + ActionsDAG interpolate_actions_dag; for (auto & interpolate_node : interpolate_list_node.getNodes()) { @@ -453,10 +464,10 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, } std::unordered_map before_sort_actions_inputs_name_to_node; - for (const auto & node : before_sort_actions->getInputs()) + for (const auto & node : before_sort_actions->dag.getInputs()) before_sort_actions_inputs_name_to_node.emplace(node->result_name, node); - for (const auto & node : interpolate_actions_dag->getNodes()) + for (const auto & node : interpolate_actions_dag.getNodes()) { if (before_sort_actions_dag_output_node_names.contains(node.result_name) || node.type != ActionsDAG::ActionType::INPUT) @@ -466,7 +477,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, if (input_node_it == before_sort_actions_inputs_name_to_node.end()) { auto input_column = ColumnWithTypeAndName{node.column, node.result_type, node.result_name}; - const auto * input_node = &before_sort_actions->addInput(std::move(input_column)); + const auto * input_node = &before_sort_actions->dag.addInput(std::move(input_column)); auto [it, _] = before_sort_actions_inputs_name_to_node.emplace(node.result_name, input_node); input_node_it = it; } @@ -491,22 +502,23 @@ LimitByAnalysisResult analyzeLimitBy(const QueryNode & query_node, const NameSet & required_output_nodes_names, ActionsChain & actions_chain) { - auto before_limit_by_actions = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), input_columns, planner_context); + auto before_limit_by_actions = std::make_shared(); + before_limit_by_actions->dag = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), input_columns, planner_context); NameSet limit_by_column_names_set; Names limit_by_column_names; - limit_by_column_names.reserve(before_limit_by_actions->getOutputs().size()); - for (auto & output_node : before_limit_by_actions->getOutputs()) + limit_by_column_names.reserve(before_limit_by_actions->dag.getOutputs().size()); + for (auto & output_node : before_limit_by_actions->dag.getOutputs()) { limit_by_column_names_set.insert(output_node->result_name); limit_by_column_names.push_back(output_node->result_name); } - for (const auto & node : before_limit_by_actions->getNodes()) + for (const auto & node : before_limit_by_actions->dag.getNodes()) { if (required_output_nodes_names.contains(node.result_name) && !limit_by_column_names_set.contains(node.result_name)) - before_limit_by_actions->getOutputs().push_back(&node); + before_limit_by_actions->dag.getOutputs().push_back(&node); } auto actions_step_before_limit_by = std::make_unique(before_limit_by_actions); @@ -534,8 +546,11 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo if (query_node.hasWhere()) { where_analysis_result_optional = analyzeFilter(query_node.getWhere(), current_output_columns, planner_context, actions_chain); - where_action_step_index_optional = actions_chain.getLastStepIndex(); - current_output_columns = actions_chain.getLastStepAvailableOutputColumns(); + if (where_analysis_result_optional) + { + where_action_step_index_optional = actions_chain.getLastStepIndex(); + current_output_columns = actions_chain.getLastStepAvailableOutputColumns(); + } } auto aggregation_analysis_result_optional = analyzeAggregation(query_tree, current_output_columns, planner_context, actions_chain); @@ -548,8 +563,11 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo if (query_node.hasHaving()) { having_analysis_result_optional = analyzeFilter(query_node.getHaving(), current_output_columns, planner_context, actions_chain); - having_action_step_index_optional = actions_chain.getLastStepIndex(); - current_output_columns = actions_chain.getLastStepAvailableOutputColumns(); + if (having_analysis_result_optional) + { + having_action_step_index_optional = actions_chain.getLastStepIndex(); + current_output_columns = actions_chain.getLastStepAvailableOutputColumns(); + } } auto window_analysis_result_optional = analyzeWindow(query_tree, current_output_columns, planner_context, actions_chain); @@ -562,8 +580,11 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo if (query_node.hasQualify()) { qualify_analysis_result_optional = analyzeFilter(query_node.getQualify(), current_output_columns, planner_context, actions_chain); - qualify_action_step_index_optional = actions_chain.getLastStepIndex(); - current_output_columns = actions_chain.getLastStepAvailableOutputColumns(); + if (qualify_analysis_result_optional) + { + qualify_action_step_index_optional = actions_chain.getLastStepIndex(); + current_output_columns = actions_chain.getLastStepAvailableOutputColumns(); + } } auto projection_analysis_result = analyzeProjection(query_node, current_output_columns, planner_context, actions_chain); @@ -591,7 +612,7 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo if (sort_analysis_result_optional.has_value() && planner_query_processing_info.isFirstStage() && planner_query_processing_info.getToStage() != QueryProcessingStage::Complete) { const auto & before_order_by_actions = sort_analysis_result_optional->before_order_by_actions; - for (const auto & output_node : before_order_by_actions->getOutputs()) + for (const auto & output_node : before_order_by_actions->dag.getOutputs()) required_output_nodes_names.insert(output_node->result_name); } @@ -647,8 +668,10 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo } } - auto project_names_actions = std::make_shared(project_names_input); - project_names_actions->project(projection_analysis_result.projection_column_names_with_display_aliases); + auto project_names_actions = std::make_shared(); + project_names_actions->dag = ActionsDAG(project_names_input); + project_names_actions->dag.project(projection_analysis_result.projection_column_names_with_display_aliases); + project_names_actions->project_input = true; actions_chain.addStep(std::make_unique(project_names_actions)); actions_chain.finalize(); diff --git a/src/Planner/PlannerExpressionAnalysis.h b/src/Planner/PlannerExpressionAnalysis.h index 0773272e49a..820df7131a7 100644 --- a/src/Planner/PlannerExpressionAnalysis.h +++ b/src/Planner/PlannerExpressionAnalysis.h @@ -17,22 +17,22 @@ namespace DB struct ProjectionAnalysisResult { - ActionsDAGPtr projection_actions; + ActionsAndProjectInputsFlagPtr projection_actions; Names projection_column_names; NamesWithAliases projection_column_names_with_display_aliases; - ActionsDAGPtr project_names_actions; + ActionsAndProjectInputsFlagPtr project_names_actions; }; struct FilterAnalysisResult { - ActionsDAGPtr filter_actions; + ActionsAndProjectInputsFlagPtr filter_actions; std::string filter_column_name; bool remove_filter_column = false; }; struct AggregationAnalysisResult { - ActionsDAGPtr before_aggregation_actions; + ActionsAndProjectInputsFlagPtr before_aggregation_actions; Names aggregation_keys; AggregateDescriptions aggregate_descriptions; GroupingSetsParamsList grouping_sets_parameters_list; @@ -41,19 +41,19 @@ struct AggregationAnalysisResult struct WindowAnalysisResult { - ActionsDAGPtr before_window_actions; + ActionsAndProjectInputsFlagPtr before_window_actions; std::vector window_descriptions; }; struct SortAnalysisResult { - ActionsDAGPtr before_order_by_actions; + ActionsAndProjectInputsFlagPtr before_order_by_actions; bool has_with_fill = false; }; struct LimitByAnalysisResult { - ActionsDAGPtr before_limit_by_actions; + ActionsAndProjectInputsFlagPtr before_limit_by_actions; Names limit_by_column_names; }; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 1b2a55a50b0..d26092d57cb 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -52,10 +53,11 @@ #include #include #include -#include +#include #include #include #include +#include #include #include @@ -498,12 +500,14 @@ FilterDAGInfo buildCustomKeyFilterIfNeeded(const StoragePtr & storage, LOG_TRACE(getLogger("Planner"), "Processing query on a replica using custom_key '{}'", settings.parallel_replicas_custom_key.value); auto parallel_replicas_custom_filter_ast = getCustomKeyFilterForParallelReplica( - settings.parallel_replicas_count, - settings.parallel_replica_offset, - std::move(custom_key_ast), - settings.parallel_replicas_custom_key_filter_type, - storage->getInMemoryMetadataPtr()->columns, - query_context); + settings.parallel_replicas_count, + settings.parallel_replica_offset, + std::move(custom_key_ast), + {settings.parallel_replicas_custom_key_filter_type, + settings.parallel_replicas_custom_key_range_lower, + settings.parallel_replicas_custom_key_range_upper}, + storage->getInMemoryMetadataPtr()->columns, + query_context); return buildFilterInfo(parallel_replicas_custom_filter_ast, table_expression_query_info.table_expression, planner_context); } @@ -643,7 +647,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions(); - table_expression_query_info.optimized_prewhere_info = table_expression_data.getPrewhereInfo(); table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; size_t max_streams = settings.max_threads; @@ -690,14 +693,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows != 0) { if (max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows) - table_expression_query_info.limit = max_block_size_limited; + table_expression_query_info.trivial_limit = max_block_size_limited; /// Ask to read just enough rows to make the max_rows limit effective (so it has a chance to be triggered). else if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows < std::numeric_limits::max()) - table_expression_query_info.limit = 1 + select_query_info.local_storage_limits.local_limits.size_limits.max_rows; + table_expression_query_info.trivial_limit = 1 + select_query_info.local_storage_limits.local_limits.size_limits.max_rows; } else { - table_expression_query_info.limit = max_block_size_limited; + table_expression_query_info.trivial_limit = max_block_size_limited; } } @@ -767,37 +770,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres { if (!select_query_options.only_analyze) { - auto storage_merge_tree = std::dynamic_pointer_cast(storage); - if (storage_merge_tree && query_context->canUseParallelReplicasOnInitiator() - && settings.parallel_replicas_min_number_of_rows_per_replica > 0) - { - UInt64 rows_to_read - = storage_merge_tree->estimateNumberOfRowsToRead(query_context, storage_snapshot, table_expression_query_info); - - if (max_block_size_limited && (max_block_size_limited < rows_to_read)) - rows_to_read = max_block_size_limited; - - size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; - LOG_TRACE( - getLogger("Planner"), - "Estimated {} rows to read. It is enough work for {} parallel replicas", - rows_to_read, - number_of_replicas_to_use); - - if (number_of_replicas_to_use <= 1) - { - planner_context->getMutableQueryContext()->setSetting( - "allow_experimental_parallel_reading_from_replicas", Field(0)); - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); - LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); - } - else if (number_of_replicas_to_use < settings.max_parallel_replicas) - { - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); - LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); - } - } - auto & prewhere_info = table_expression_query_info.prewhere_info; const auto & prewhere_actions = table_expression_data.getPrewhereFilterActions(); @@ -895,6 +867,96 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres max_block_size, max_streams); + auto parallel_replicas_enabled_for_storage = [](const StoragePtr & table, const Settings & query_settings) + { + if (!table->isMergeTree()) + return false; + + if (!table->supportsReplication() && !query_settings.parallel_replicas_for_non_replicated_merge_tree) + return false; + + return true; + }; + + /// query_plan can be empty if there is nothing to read + if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings) && query_context->canUseParallelReplicasOnInitiator()) + { + // (1) find read step + QueryPlan::Node * node = query_plan.getRootNode(); + ReadFromMergeTree * reading = nullptr; + while (node) + { + reading = typeid_cast(node->step.get()); + if (reading) + break; + + QueryPlan::Node * prev_node = node; + if (!node->children.empty()) + { + chassert(node->children.size() == 1); + node = node->children.at(0); + } + else + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Step is expected to be ReadFromMergeTree but it's {}", + prev_node->step->getName()); + } + } + + chassert(reading); + + // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read + if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) + { + auto result_ptr = reading->selectRangesToRead(); + + UInt64 rows_to_read = result_ptr->selected_rows; + if (table_expression_query_info.trivial_limit > 0 && table_expression_query_info.trivial_limit < rows_to_read) + rows_to_read = table_expression_query_info.trivial_limit; + + if (max_block_size_limited && (max_block_size_limited < rows_to_read)) + rows_to_read = max_block_size_limited; + + const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; + LOG_TRACE( + getLogger("Planner"), + "Estimated {} rows to read. It is enough work for {} parallel replicas", + rows_to_read, + number_of_replicas_to_use); + + if (number_of_replicas_to_use <= 1) + { + planner_context->getMutableQueryContext()->setSetting( + "allow_experimental_parallel_reading_from_replicas", Field(0)); + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); + LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); + } + else if (number_of_replicas_to_use < settings.max_parallel_replicas) + { + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); + LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); + } + } + + // (3) if parallel replicas still enabled - replace reading step + if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator()) + { + from_stage = QueryProcessingStage::WithMergeableState; + QueryPlan query_plan_parallel_replicas; + ClusterProxy::executeQueryWithParallelReplicas( + query_plan_parallel_replicas, + storage->getStorageID(), + from_stage, + table_expression_query_info.query_tree, + table_expression_query_info.planner_context, + query_context, + table_expression_query_info.storage_limits); + query_plan = std::move(query_plan_parallel_replicas); + } + } + const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns) { @@ -1070,7 +1132,7 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP } } - cast_actions_dag->projectInput(); + cast_actions_dag->appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable"); plan_to_add_cast.addStep(std::move(cast_join_columns_step)); @@ -1116,12 +1178,12 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ join_table_expression, planner_context); - join_clauses_and_actions.left_join_expressions_actions->projectInput(); + join_clauses_and_actions.left_join_expressions_actions->appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header); auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions); left_join_expressions_actions_step->setStepDescription("JOIN actions"); left_plan.addStep(std::move(left_join_expressions_actions_step)); - join_clauses_and_actions.right_join_expressions_actions->projectInput(); + join_clauses_and_actions.right_join_expressions_actions->appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header); auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), join_clauses_and_actions.right_join_expressions_actions); right_join_expressions_actions_step->setStepDescription("JOIN actions"); right_plan.addStep(std::move(right_join_expressions_actions_step)); @@ -1173,7 +1235,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ output_node = &cast_actions_dag->addCast(*output_node, cast_type, output_node->result_name); } - cast_actions_dag->projectInput(); + cast_actions_dag->appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN USING columns"); @@ -1526,6 +1588,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.left_join_expressions_actions)); if (join_clauses_and_actions.right_join_expressions_actions) left_join_tree_query_plan.actions_dags.emplace_back(std::move(join_clauses_and_actions.right_join_expressions_actions)); + if (join_clauses_and_actions.mixed_join_expressions_actions) + left_join_tree_query_plan.actions_dags.push_back(join_clauses_and_actions.mixed_join_expressions_actions); auto mapping = std::move(left_join_tree_query_plan.query_node_to_plan_step_mapping); auto & r_mapping = right_join_tree_query_plan.query_node_to_plan_step_mapping; @@ -1566,7 +1630,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ array_join_column_names.insert(array_join_column_identifier); auto & array_join_expression_column = array_join_expression->as(); - auto expression_dag_index_nodes = actions_visitor.visit(array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); + auto expression_dag_index_nodes = actions_visitor.visit(*array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); for (auto & expression_dag_index_node : expression_dag_index_nodes) { @@ -1576,7 +1640,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ } } - array_join_action_dag->projectInput(); + array_join_action_dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); join_tree_query_plan.actions_dags.push_back(array_join_action_dag); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index c410b04f209..e752c57b08b 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include #include @@ -183,7 +183,7 @@ const ActionsDAG::Node * appendExpression( const JoinNode & join_node) { PlannerActionsVisitor join_expression_visitor(planner_context); - auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(dag, expression); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*dag, expression); if (join_expression_dag_node_raw_pointers.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", @@ -603,7 +603,7 @@ JoinClausesAndActions buildJoinClausesAndActions( { auto mixed_join_expressions_actions = std::make_shared(mixed_table_expression_columns); PlannerActionsVisitor join_expression_visitor(planner_context); - auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(mixed_join_expressions_actions, join_expression); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*mixed_join_expressions_actions, join_expression); if (join_expression_dag_node_raw_pointers.size() != 1) throw Exception( ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", join_node.formatASTForErrorMessage()); @@ -802,13 +802,12 @@ static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, algorithm == JoinAlgorithm::PARALLEL_HASH || algorithm == JoinAlgorithm::DEFAULT) { - if (table_join->allowParallelHashJoin()) - { - auto query_context = planner_context->getQueryContext(); - return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); - } + auto query_context = planner_context->getQueryContext(); - return std::make_shared(table_join, right_table_expression_header); + if (table_join->allowParallelHashJoin()) + return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); + + return std::make_shared(table_join, right_table_expression_header, query_context->getSettingsRef().join_any_take_last_row); } if (algorithm == JoinAlgorithm::FULL_SORTING_MERGE) diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 4a74bf413d3..18a6d297838 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -213,14 +213,14 @@ StorageLimits buildStorageLimits(const Context & context, const SelectQueryOptio return {limits, leaf_limits}; } -ActionsDAGPtr buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, +ActionsDAG buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context) { - ActionsDAGPtr action_dag = std::make_shared(input_columns); + ActionsDAG action_dag(input_columns); PlannerActionsVisitor actions_visitor(planner_context); auto expression_dag_index_nodes = actions_visitor.visit(action_dag, expression_node); - action_dag->getOutputs() = std::move(expression_dag_index_nodes); + action_dag.getOutputs() = std::move(expression_dag_index_nodes); return action_dag; } @@ -443,7 +443,7 @@ FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, auto filter_actions_dag = std::make_shared(); PlannerActionsVisitor actions_visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = actions_visitor.visit(filter_actions_dag, filter_query_tree); + auto expression_nodes = actions_visitor.visit(*filter_actions_dag, filter_query_tree); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filter actions must return single output node. Actual {}", diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 4706f552c9d..3172847f053 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -47,7 +47,7 @@ StorageLimits buildStorageLimits(const Context & context, const SelectQueryOptio * Inputs are not used for actions dag outputs. * Only root query tree expression node is used as actions dag output. */ -ActionsDAGPtr buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, +ActionsDAG buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context); diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 2631f665f9c..5f6cf2f7230 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -125,7 +125,7 @@ void Chunk::addColumn(size_t position, ColumnPtr column) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::addColumn(), max position = {}", - position, columns.size() - 1); + position, !columns.empty() ? columns.size() - 1 : 0); if (empty()) num_rows = column->size(); else if (column->size() != num_rows) @@ -143,7 +143,7 @@ void Chunk::erase(size_t position) if (position >= columns.size()) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::erase(), max position = {}", - toString(position), toString(columns.size() - 1)); + toString(position), toString(!columns.empty() ? columns.size() - 1 : 0)); columns.erase(columns.begin() + position); } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 8855a1bc28d..be4e9430c34 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -46,6 +46,15 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat( { const auto & header = getPort().getHeader(); name_map = header.getNamesToIndexesMap(); + if (format_settings_.json.ignore_key_case) + { + for (auto & it : name_map) + { + StringRef key = it.first; + String lower_case_key = transformFieldNameToLowerCase(key); + lower_case_name_map[lower_case_key] = key; + } + } if (format_settings_.import_nested_json) { for (size_t i = 0; i != header.columns(); ++i) @@ -171,7 +180,15 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns) skipUnknownField(name_ref); continue; } - const size_t column_index = columnIndex(name_ref, key_index); + size_t column_index = 0; + if (format_settings.json.ignore_key_case) + { + String lower_case_name = transformFieldNameToLowerCase(name_ref); + StringRef field_name_ref = lower_case_name_map[lower_case_name]; + column_index = columnIndex(field_name_ref, key_index); + } + else + column_index = columnIndex(name_ref, key_index); if (unlikely(ssize_t(column_index) < 0)) { diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index d97aa2dad8d..8a1cef8fa9f 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -55,7 +55,13 @@ private: virtual void readRowStart(MutableColumns &) {} virtual void skipRowStart() {} - + String transformFieldNameToLowerCase(const StringRef & field_name) + { + String field_name_str = field_name.toString(); + std::transform(field_name_str.begin(), field_name_str.end(), field_name_str.begin(), + [](unsigned char c) { return std::tolower(c); }); + return field_name_str; + } /// Buffer for the read from the stream field name. Used when you have to copy it. /// Also, if processing of Nested data is in progress, it holds the common prefix /// of the nested column names (so that appending the field name to it produces @@ -74,7 +80,8 @@ private: /// Hash table match `field name -> position in the block`. NOTE You can use perfect hash map. Block::NameMap name_map; - + /// Hash table match `lower_case field name -> field name in the block`. + std::unordered_map lower_case_name_map; /// Cached search results for previous row (keyed as index in JSON object) - used as a hint. std::vector prev_positions; diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index 0b55f633c6a..dcd5a531b05 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -269,7 +269,12 @@ convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTy case orc::SHORT: case orc::INT: case orc::LONG: { - /// May throw exception + /// May throw exception. + /// + /// In particular, it'll throw if we request the column as unsigned, like this: + /// SELECT * FROM file('t.orc', ORC, 'x UInt8') WHERE x > 10 + /// We have to reject this, otherwise it would miss values > 127 (because + /// they're treated as negative by ORC). auto val = field.get(); return orc::Literal(val); } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 1e36c100667..6f543a05fba 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -315,18 +315,20 @@ void ORCBlockOutputFormat::writeColumn( if (null_bytemap) orc_column.hasNulls = true; + /// ORC doesn't have unsigned types, so cast everything to signed and sign-extend to Int64 to + /// make the ORC library calculate min and max correctly. switch (type->getTypeId()) { case TypeIndex::Enum8: [[fallthrough]]; case TypeIndex::Int8: { /// Note: Explicit cast to avoid clang-tidy error: 'signed char' to 'long' conversion; consider casting to 'unsigned char' first. - writeNumbers(orc_column, column, null_bytemap, [](const Int8 & value){ return static_cast(value); }); + writeNumbers(orc_column, column, null_bytemap, [](const Int8 & value){ return Int64(Int8(value)); }); break; } case TypeIndex::UInt8: { - writeNumbers(orc_column, column, null_bytemap, [](const UInt8 & value){ return value; }); + writeNumbers(orc_column, column, null_bytemap, [](const UInt8 & value){ return Int64(Int8(value)); }); break; } case TypeIndex::Enum16: [[fallthrough]]; @@ -338,7 +340,7 @@ void ORCBlockOutputFormat::writeColumn( case TypeIndex::Date: [[fallthrough]]; case TypeIndex::UInt16: { - writeNumbers(orc_column, column, null_bytemap, [](const UInt16 & value){ return value; }); + writeNumbers(orc_column, column, null_bytemap, [](const UInt16 & value){ return Int64(Int16(value)); }); break; } case TypeIndex::Date32: [[fallthrough]]; @@ -349,12 +351,12 @@ void ORCBlockOutputFormat::writeColumn( } case TypeIndex::UInt32: { - writeNumbers(orc_column, column, null_bytemap, [](const UInt32 & value){ return value; }); + writeNumbers(orc_column, column, null_bytemap, [](const UInt32 & value){ return Int64(Int32(value)); }); break; } case TypeIndex::IPv4: { - writeNumbers(orc_column, column, null_bytemap, [](const IPv4 & value){ return value.toUnderType(); }); + writeNumbers(orc_column, column, null_bytemap, [](const IPv4 & value){ return Int64(Int32(value.toUnderType())); }); break; } case TypeIndex::Int64: diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp index 9a15789f267..18a0db7484e 100644 --- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp +++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp @@ -113,7 +113,7 @@ private: { throw Exception( ErrorCodes::PARQUET_EXCEPTION, - "Unsupported logical type: {} and physical type: {} for field =={}=={}", + "Unsupported logical type: {} and physical type: {} for field `{}`{}", col_descriptor.logical_type()->ToString(), col_descriptor.physical_type(), col_descriptor.name(), msg); } }; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 04b3a64b6cb..e837d4d5e20 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -420,6 +420,24 @@ void ParquetBlockInputFormat::initializeIfNeeded() int num_row_groups = metadata->num_row_groups(); row_group_batches.reserve(num_row_groups); + auto adative_chunk_size = [&](int row_group_idx) -> size_t + { + size_t total_size = 0; + auto row_group_meta = metadata->RowGroup(row_group_idx); + for (int column_index : column_indices) + { + total_size += row_group_meta->ColumnChunk(column_index)->total_uncompressed_size(); + } + if (!total_size || !format_settings.parquet.prefer_block_bytes) return 0; + auto average_row_bytes = floor(static_cast(total_size) / row_group_meta->num_rows()); + // avoid inf preferred_num_rows; + if (average_row_bytes < 1) return 0; + const size_t preferred_num_rows = static_cast(floor(format_settings.parquet.prefer_block_bytes/average_row_bytes)); + const size_t MIN_ROW_NUM = 128; + // size_t != UInt64 in darwin + return std::min(std::max(preferred_num_rows, MIN_ROW_NUM), static_cast(format_settings.parquet.max_block_size)); + }; + for (int row_group = 0; row_group < num_row_groups; ++row_group) { if (skip_row_groups.contains(row_group)) @@ -439,6 +457,8 @@ void ParquetBlockInputFormat::initializeIfNeeded() row_group_batches.back().row_groups_idxs.push_back(row_group); row_group_batches.back().total_rows += metadata->RowGroup(row_group)->num_rows(); row_group_batches.back().total_bytes_compressed += metadata->RowGroup(row_group)->total_compressed_size(); + auto rows = adative_chunk_size(row_group); + row_group_batches.back().adaptive_chunk_size = rows ? rows : format_settings.parquet.max_block_size; } } @@ -449,7 +469,7 @@ void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_bat parquet::ArrowReaderProperties arrow_properties; parquet::ReaderProperties reader_properties(ArrowMemoryPool::instance()); arrow_properties.set_use_threads(false); - arrow_properties.set_batch_size(format_settings.parquet.max_block_size); + arrow_properties.set_batch_size(row_group_batch.adaptive_chunk_size); // When reading a row group, arrow will: // 1. Look at `metadata` to get all byte ranges it'll need to read from the file (typically one diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index d6591f5c0a3..24735ee4371 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -208,6 +208,8 @@ private: size_t total_rows = 0; size_t total_bytes_compressed = 0; + size_t adaptive_chunk_size = 0; + std::vector row_groups_idxs; // These are only used by the decoding thread, so don't require locking the mutex. diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 2662232a048..a5d334f4f1d 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -321,6 +321,9 @@ void ParquetBlockOutputFormat::writeUsingArrow(std::vector chunks) parquet::WriterProperties::Builder builder; builder.version(getParquetVersion(format_settings)); builder.compression(getParquetCompression(format_settings.parquet.output_compression_method)); + // write page index is disable at default. + if (format_settings.parquet.write_page_index) + builder.enable_write_page_index(); parquet::ArrowWriterProperties::Builder writer_props_builder; if (format_settings.parquet.output_compliant_nested_types) diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index b1dbe68579f..ef3ef18e88d 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -116,6 +116,12 @@ struct GridSymbols const char * dash = "─"; const char * bold_bar = "┃"; const char * bar = "│"; + const char * bold_right_separator_footer = "┫"; + const char * bold_left_separator_footer = "┣"; + const char * bold_middle_separator_footer = "╋"; + const char * bold_left_bottom_corner = "┗"; + const char * bold_right_bottom_corner = "┛"; + const char * bold_bottom_separator = "┻"; }; GridSymbols utf8_grid_symbols; @@ -182,47 +188,58 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind Widths name_widths; calculateWidths(header, chunk, widths, max_widths, name_widths); - const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? - utf8_grid_symbols : - ascii_grid_symbols; + const GridSymbols & grid_symbols + = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? utf8_grid_symbols : ascii_grid_symbols; /// Create separators WriteBufferFromOwnString top_separator; WriteBufferFromOwnString middle_names_separator; WriteBufferFromOwnString middle_values_separator; WriteBufferFromOwnString bottom_separator; + WriteBufferFromOwnString footer_top_separator; + WriteBufferFromOwnString footer_bottom_separator; - top_separator << grid_symbols.bold_left_top_corner; - middle_names_separator << grid_symbols.bold_left_separator; + top_separator << grid_symbols.bold_left_top_corner; + middle_names_separator << grid_symbols.bold_left_separator; middle_values_separator << grid_symbols.left_separator; - bottom_separator << grid_symbols.left_bottom_corner; + bottom_separator << grid_symbols.left_bottom_corner; + footer_top_separator << grid_symbols.bold_left_separator_footer; + footer_bottom_separator << grid_symbols.bold_left_bottom_corner; for (size_t i = 0; i < num_columns; ++i) { if (i != 0) { - top_separator << grid_symbols.bold_top_separator; - middle_names_separator << grid_symbols.bold_middle_separator; + top_separator << grid_symbols.bold_top_separator; + middle_names_separator << grid_symbols.bold_middle_separator; middle_values_separator << grid_symbols.middle_separator; - bottom_separator << grid_symbols.bottom_separator; + bottom_separator << grid_symbols.bottom_separator; + footer_top_separator << grid_symbols.bold_middle_separator_footer; + footer_bottom_separator << grid_symbols.bold_bottom_separator; } for (size_t j = 0; j < max_widths[i] + 2; ++j) { - top_separator << grid_symbols.bold_dash; - middle_names_separator << grid_symbols.bold_dash; + top_separator << grid_symbols.bold_dash; + middle_names_separator << grid_symbols.bold_dash; middle_values_separator << grid_symbols.dash; - bottom_separator << grid_symbols.dash; + bottom_separator << grid_symbols.dash; + footer_top_separator << grid_symbols.bold_dash; + footer_bottom_separator << grid_symbols.bold_dash; } } - top_separator << grid_symbols.bold_right_top_corner << "\n"; - middle_names_separator << grid_symbols.bold_right_separator << "\n"; + top_separator << grid_symbols.bold_right_top_corner << "\n"; + middle_names_separator << grid_symbols.bold_right_separator << "\n"; middle_values_separator << grid_symbols.right_separator << "\n"; - bottom_separator << grid_symbols.right_bottom_corner << "\n"; + bottom_separator << grid_symbols.right_bottom_corner << "\n"; + footer_top_separator << grid_symbols.bold_right_separator_footer << "\n"; + footer_bottom_separator << grid_symbols.bold_right_bottom_corner << "\n"; std::string top_separator_s = top_separator.str(); std::string middle_names_separator_s = middle_names_separator.str(); std::string middle_values_separator_s = middle_values_separator.str(); std::string bottom_separator_s = bottom_separator.str(); + std::string footer_top_separator_s = footer_top_separator.str(); + std::string footer_bottom_separator_s = footer_bottom_separator.str(); if (format_settings.pretty.output_format_pretty_row_numbers) { @@ -239,43 +256,47 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind } /// Names - writeCString(grid_symbols.bold_bar, out); - writeCString(" ", out); - for (size_t i = 0; i < num_columns; ++i) + auto write_names = [&]() -> void { - if (i != 0) + writeCString(grid_symbols.bold_bar, out); + writeCString(" ", out); + for (size_t i = 0; i < num_columns; ++i) { - writeCString(" ", out); - writeCString(grid_symbols.bold_bar, out); - writeCString(" ", out); + if (i != 0) + { + writeCString(" ", out); + writeCString(grid_symbols.bold_bar, out); + writeCString(" ", out); + } + + const auto & col = header.getByPosition(i); + + if (color) + writeCString("\033[1m", out); + + if (col.type->shouldAlignRightInPrettyFormats()) + { + for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k) + writeChar(' ', out); + + writeString(col.name, out); + } + else + { + writeString(col.name, out); + + for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k) + writeChar(' ', out); + } + + if (color) + writeCString("\033[0m", out); } - - const auto & col = header.getByPosition(i); - - if (color) - writeCString("\033[1m", out); - - if (col.type->shouldAlignRightInPrettyFormats()) - { - for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k) - writeChar(' ', out); - - writeString(col.name, out); - } - else - { - writeString(col.name, out); - - for (size_t k = 0; k < max_widths[i] - name_widths[i]; ++k) - writeChar(' ', out); - } - - if (color) - writeCString("\033[0m", out); - } - writeCString(" ", out); - writeCString(grid_symbols.bold_bar, out); - writeCString("\n", out); + writeCString(" ", out); + writeCString(grid_symbols.bold_bar, out); + writeCString("\n", out); + }; + write_names(); if (format_settings.pretty.output_format_pretty_row_numbers) { @@ -317,9 +338,15 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind if (j != 0) writeCString(grid_symbols.bar, out); const auto & type = *header.getByPosition(j).type; - writeValueWithPadding(*columns[j], *serializations[j], i, + writeValueWithPadding( + *columns[j], + *serializations[j], + i, widths[j].empty() ? max_widths[j] : widths[j][i], - max_widths[j], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type)); + max_widths[j], + cut_to_width, + type.shouldAlignRightInPrettyFormats(), + isNumber(type)); } writeCString(grid_symbols.bar, out); @@ -332,8 +359,33 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind /// Write left blank writeString(String(row_number_width, ' '), out); } - writeString(bottom_separator_s, out); + /// output column names in the footer + if ((num_rows >= format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows) && format_settings.pretty.output_format_pretty_display_footer_column_names) + { + writeString(footer_top_separator_s, out); + + if (format_settings.pretty.output_format_pretty_row_numbers) + { + /// Write left blank + writeString(String(row_number_width, ' '), out); + } + + /// output header names + write_names(); + + if (format_settings.pretty.output_format_pretty_row_numbers) + { + /// Write left blank + writeString(String(row_number_width, ' '), out); + } + + writeString(footer_bottom_separator_s, out); + } + else + { + writeString(bottom_separator_s, out); + } total_rows += num_rows; } diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index e1cbf69dbf0..57ec23e7e3b 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -57,7 +57,8 @@ PrettyCompactBlockOutputFormat::PrettyCompactBlockOutputFormat(WriteBuffer & out void PrettyCompactBlockOutputFormat::writeHeader( const Block & block, const Widths & max_widths, - const Widths & name_widths) + const Widths & name_widths, + const bool write_footer) { if (format_settings.pretty.output_format_pretty_row_numbers) { @@ -70,14 +71,20 @@ void PrettyCompactBlockOutputFormat::writeHeader( ascii_grid_symbols; /// Names - writeCString(grid_symbols.left_top_corner, out); + if (write_footer) + writeCString(grid_symbols.left_bottom_corner, out); + else + writeCString(grid_symbols.left_top_corner, out); writeCString(grid_symbols.dash, out); for (size_t i = 0; i < max_widths.size(); ++i) { if (i != 0) { writeCString(grid_symbols.dash, out); - writeCString(grid_symbols.top_separator, out); + if (write_footer) + writeCString(grid_symbols.bottom_separator, out); + else + writeCString(grid_symbols.top_separator, out); writeCString(grid_symbols.dash, out); } @@ -107,7 +114,10 @@ void PrettyCompactBlockOutputFormat::writeHeader( } } writeCString(grid_symbols.dash, out); - writeCString(grid_symbols.right_top_corner, out); + if (write_footer) + writeCString(grid_symbols.right_bottom_corner, out); + else + writeCString(grid_symbols.right_top_corner, out); writeCString("\n", out); } @@ -195,13 +205,19 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po Widths name_widths; calculateWidths(header, chunk, widths, max_widths, name_widths); - writeHeader(header, max_widths, name_widths); + writeHeader(header, max_widths, name_widths, false); for (size_t i = 0; i < num_rows && total_rows + i < max_rows; ++i) writeRow(i, header, chunk, widths, max_widths); - - writeBottom(max_widths); + if ((num_rows >= format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows) && format_settings.pretty.output_format_pretty_display_footer_column_names) + { + writeHeader(header, max_widths, name_widths, true); + } + else + { + writeBottom(max_widths); + } total_rows += num_rows; } diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h index 911fc2e950c..b0b7c2ad8f4 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.h @@ -17,7 +17,7 @@ public: String getName() const override { return "PrettyCompactBlockOutputFormat"; } private: - void writeHeader(const Block & block, const Widths & max_widths, const Widths & name_widths); + void writeHeader(const Block & block, const Widths & max_widths, const Widths & name_widths, bool write_footer); void writeBottom(const Widths & max_widths); void writeRow( size_t row_num, diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index 3f224f034aa..0a594b54b12 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -36,39 +36,46 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port if (format_settings.pretty.output_format_pretty_row_numbers) writeString(String(row_number_width, ' '), out); /// Names - for (size_t i = 0; i < num_columns; ++i) + auto write_names = [&](const bool is_footer) -> void { - if (i != 0) - writeCString(" ", out); - else - writeChar(' ', out); - - const ColumnWithTypeAndName & col = header.getByPosition(i); - - if (col.type->shouldAlignRightInPrettyFormats()) + for (size_t i = 0; i < num_columns; ++i) { - for (ssize_t k = 0; k < std::max(0z, static_cast(max_widths[i] - name_widths[i])); ++k) + if (i != 0) + writeCString(" ", out); + else writeChar(' ', out); - if (color) - writeCString("\033[1m", out); - writeString(col.name, out); - if (color) - writeCString("\033[0m", out); - } - else - { - if (color) - writeCString("\033[1m", out); - writeString(col.name, out); - if (color) - writeCString("\033[0m", out); + const ColumnWithTypeAndName & col = header.getByPosition(i); - for (ssize_t k = 0; k < std::max(0z, static_cast(max_widths[i] - name_widths[i])); ++k) - writeChar(' ', out); + if (col.type->shouldAlignRightInPrettyFormats()) + { + for (ssize_t k = 0; k < std::max(0z, static_cast(max_widths[i] - name_widths[i])); ++k) + writeChar(' ', out); + + if (color) + writeCString("\033[1m", out); + writeString(col.name, out); + if (color) + writeCString("\033[0m", out); + } + else + { + if (color) + writeCString("\033[1m", out); + writeString(col.name, out); + if (color) + writeCString("\033[0m", out); + + for (ssize_t k = 0; k < std::max(0z, static_cast(max_widths[i] - name_widths[i])); ++k) + writeChar(' ', out); + } } - } - writeCString("\n\n", out); + if (!is_footer) + writeCString("\n\n", out); + else + writeCString("\n", out); + }; + write_names(false); for (size_t row = 0; row < num_rows && total_rows + row < max_rows; ++row) { @@ -95,11 +102,19 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port writeValueWithPadding( *columns[column], *serializations[column], row, cur_width, max_widths[column], cut_to_width, type.shouldAlignRightInPrettyFormats(), isNumber(type)); } - writeReadableNumberTip(chunk); writeChar('\n', out); } + /// Write blank line between last row and footer + if ((num_rows >= format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows) && format_settings.pretty.output_format_pretty_display_footer_column_names) + writeCString("\n", out); + /// Write left blank + if ((num_rows >= format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows) && format_settings.pretty.output_format_pretty_row_numbers && format_settings.pretty.output_format_pretty_display_footer_column_names) + writeString(String(row_number_width, ' '), out); + /// Write footer + if ((num_rows >= format_settings.pretty.output_format_pretty_display_footer_column_names_min_rows) && format_settings.pretty.output_format_pretty_display_footer_column_names) + write_names(true); total_rows += num_rows; } diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index 5ab5e5277aa..f403aca2280 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -1,21 +1,57 @@ #include #include +#include +#include +#include + namespace DB { +void IProcessor::cancel() +{ + + bool already_cancelled = is_cancelled.exchange(true, std::memory_order_acq_rel); + if (already_cancelled) + return; + + onCancel(); +} + +String IProcessor::debug() const +{ + WriteBufferFromOwnString buf; + writeString(getName(), buf); + buf.write('\n'); + + writeString("inputs (hasData, isFinished):\n", buf); + for (const auto & port : inputs) + { + buf.write('\t'); + writeBoolText(port.hasData(), buf); + buf.write(' '); + writeBoolText(port.isFinished(), buf); + buf.write('\n'); + } + + writeString("outputs (hasData, isNeeded):\n", buf); + for (const auto & port : outputs) + { + buf.write('\t'); + writeBoolText(port.hasData(), buf); + buf.write(' '); + writeBoolText(port.isNeeded(), buf); + buf.write('\n'); + } + + buf.finalize(); + return buf.str(); +} + void IProcessor::dump() const { - std::cerr << getName() << "\n"; - - std::cerr << "inputs:\n"; - for (const auto & port : inputs) - std::cerr << "\t" << port.hasData() << " " << port.isFinished() << "\n"; - - std::cerr << "outputs:\n"; - for (const auto & port : outputs) - std::cerr << "\t" << port.hasData() << " " << port.isNeeded() << "\n"; + std::cerr << debug(); } @@ -39,4 +75,3 @@ std::string IProcessor::statusToName(Status status) } } - diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 63f32d8deb7..6f779e7a8d4 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -238,12 +238,7 @@ public: /// In case if query was cancelled executor will wait till all processors finish their jobs. /// Generally, there is no reason to check this flag. However, it may be reasonable for long operations (e.g. i/o). bool isCancelled() const { return is_cancelled.load(std::memory_order_acquire); } - void cancel() - { - bool already_cancelled = is_cancelled.exchange(true, std::memory_order_acq_rel); - if (!already_cancelled) - onCancel(); - } + void cancel(); /// Additional method which is called in case if ports were updated while work() method. /// May be used to stop execution in rare cases. @@ -286,6 +281,7 @@ public: const auto & getOutputs() const { return outputs; } /// Debug output. + String debug() const; void dump() const; /// Used to print pipeline. diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 56b31b2c8ba..72934665b5c 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -50,6 +50,8 @@ FilterStep::FilterStep( , filter_column_name(std::move(filter_column_name_)) , remove_filter_column(remove_filter_column_) { + actions_dag = actions_dag->clone(); + actions_dag->removeAliasesForFilter(filter_column_name); } void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index b33a373a970..b1ab5561958 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -107,7 +107,7 @@ struct Frame using Stack = std::vector; /// Second pass optimizations -void optimizePrimaryKeyCondition(const Stack & stack); +void optimizePrimaryKeyConditionAndLimit(const Stack & stack); void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes); void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes); void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &); diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 8ca240b3e8b..263598bdca7 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -608,6 +609,14 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes return 3; } + if (auto * read_from_merge = typeid_cast(child.get())) + { + FilterDAGInfo info{filter->getExpression(), filter->getFilterColumnName(), filter->removesFilterColumn()}; + read_from_merge->addFilter(std::move(info)); + std::swap(*parent_node, *child_node); + return 1; + } + return 0; } diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp index a5cb5972bd8..6ace1b3b5ce 100644 --- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -2,10 +2,25 @@ #include #include #include +#include +#include namespace DB::QueryPlanOptimizations { +static void removeFromOutputs(ActionsDAG & dag, const ActionsDAG::Node & node) +{ + auto & outputs = dag.getOutputs(); + for (size_t i = 0; i < outputs.size(); ++i) + { + if (&node == outputs[i]) + { + outputs.erase(outputs.begin() + i); + return; + } + } +} + size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) { if (parent_node->children.size() != 1) @@ -19,6 +34,7 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) auto * parent_expr = typeid_cast(parent.get()); auto * parent_filter = typeid_cast(parent.get()); auto * child_expr = typeid_cast(child.get()); + auto * child_filter = typeid_cast(child.get()); if (parent_expr && child_expr) { @@ -60,6 +76,42 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) parent_node->children.swap(child_node->children); return 1; } + else if (parent_filter && child_filter) + { + const auto & child_actions = child_filter->getExpression(); + const auto & parent_actions = parent_filter->getExpression(); + + if (child_actions->hasArrayJoin()) + return 0; + + auto actions = child_actions->clone(); + const auto & child_filter_node = actions->findInOutputs(child_filter->getFilterColumnName()); + if (child_filter->removesFilterColumn()) + removeFromOutputs(*actions, child_filter_node); + + actions->mergeInplace(std::move(*parent_actions->clone())); + + const auto & parent_filter_node = actions->findInOutputs(parent_filter->getFilterColumnName()); + if (parent_filter->removesFilterColumn()) + removeFromOutputs(*actions, parent_filter_node); + + FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); + const auto & condition = actions->addFunction(func_builder_and, {&child_filter_node, &parent_filter_node}, {}); + auto & outputs = actions->getOutputs(); + outputs.insert(outputs.begin(), &condition); + + actions->removeUnusedActions(false); + + auto filter = std::make_unique(child_filter->getInputStreams().front(), + actions, + condition.result_name, + true); + filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_filter->getStepDescription() + ")"); + + parent_node->step = std::move(filter); + parent_node->children.swap(child_node->children); + return 1; + } return 0; } diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index fbd9b451ddc..1badd315200 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -4,10 +4,10 @@ #include #include #include +#include #include #include #include - namespace DB { @@ -30,7 +30,7 @@ static void removeFromOutput(ActionsDAG & dag, const std::string name) void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) { - if (stack.size() < 3) + if (stack.size() < 2) return; auto & frame = stack.back(); @@ -45,6 +45,9 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) if (!source_step_with_filter) return; + if (typeid_cast(frame.node->step.get())) + return; + const auto & storage_snapshot = source_step_with_filter->getStorageSnapshot(); const auto & storage = storage_snapshot->storage; if (!storage.canMoveConditionsToPrewhere()) @@ -83,7 +86,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) MergeTreeWhereOptimizer where_optimizer{ std::move(column_compressed_sizes), storage_metadata, - storage.getConditionEstimatorByPredicate(storage_snapshot, source_step_with_filter->getFilterActionsDAG(), context), + storage.getConditionSelectivityEstimatorByPredicate(storage_snapshot, source_step_with_filter->getFilterActionsDAG(), context), queried_columns, storage.supportedPrewhereColumns(), getLogger("QueryPlanOptimizePrewhere")}; @@ -118,7 +121,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) outputs.resize(size); } - auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true); + auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true, true); /// This is the leak of abstraction. /// Splited actions may have inputs which are needed only for PREWHERE. diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp similarity index 68% rename from src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp rename to src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp index dbcaf5f00a7..da4e104d18b 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyCondition.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrimaryKeyConditionAndLimit.cpp @@ -1,13 +1,13 @@ #include #include #include -#include +#include #include namespace DB::QueryPlanOptimizations { -void optimizePrimaryKeyCondition(const Stack & stack) +void optimizePrimaryKeyConditionAndLimit(const Stack & stack) { const auto & frame = stack.back(); @@ -26,15 +26,25 @@ void optimizePrimaryKeyCondition(const Stack & stack) for (auto iter = stack.rbegin() + 1; iter != stack.rend(); ++iter) { if (auto * filter_step = typeid_cast(iter->node->step.get())) + { source_step_with_filter->addFilter(filter_step->getExpression(), filter_step->getFilterColumnName()); - - /// Note: actually, plan optimizations merge Filter and Expression steps. - /// Ideally, chain should look like (Expression -> ...) -> (Filter -> ...) -> ReadFromStorage, - /// So this is likely not needed. - else if (typeid_cast(iter->node->step.get())) - continue; - else + } + else if (auto * limit_step = typeid_cast(iter->node->step.get())) + { + source_step_with_filter->setLimit(limit_step->getLimitForSorting()); break; + } + else if (typeid_cast(iter->node->step.get())) + { + /// Note: actually, plan optimizations merge Filter and Expression steps. + /// Ideally, chain should look like (Expression -> ...) -> (Filter -> ...) -> ReadFromStorage, + /// So this is likely not needed. + continue; + } + else + { + break; + } } source_step_with_filter->applyFilters(); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index c175cd516ac..537555afa2a 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -176,8 +176,6 @@ static void appendExpression(ActionsDAGPtr & dag, const ActionsDAGPtr & expressi dag->mergeInplace(std::move(*expression->clone())); else dag = expression->clone(); - - dag->projectInput(false); } /// This function builds a common DAG which is a merge of DAGs from Filter and Expression steps chain. diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index df9e095af30..25895788e2e 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -115,10 +115,10 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s while (!stack.empty()) { - optimizePrimaryKeyCondition(stack); + optimizePrimaryKeyConditionAndLimit(stack); /// NOTE: optimizePrewhere can modify the stack. - /// Prewhere optimization relies on PK optimization (getConditionEstimatorByPredicate) + /// Prewhere optimization relies on PK optimization (getConditionSelectivityEstimatorByPredicate) if (optimization_settings.optimize_prewhere) optimizePrewhere(stack, nodes); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 4017670ad14..70327bc95b4 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -77,7 +77,7 @@ static AggregateProjectionInfo getAggregatingProjectionInfo( AggregateProjectionInfo info; info.context = interpreter.getContext(); - info.before_aggregation = analysis_result.before_aggregation; + info.before_aggregation = analysis_result.before_aggregation->dag.clone(); info.keys = query_analyzer->aggregationKeys().getNames(); info.aggregates = query_analyzer->aggregates(); @@ -421,6 +421,9 @@ struct AggregateProjectionCandidates /// This flag means that DAG for projection candidate should be used in FilterStep. bool has_filter = false; + + /// If not empty, try to find exact ranges from parts to speed up trivial count queries. + String only_count_column; }; AggregateProjectionCandidates getAggregateProjectionCandidates( @@ -502,6 +505,12 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( candidates.minmax_projection.emplace(std::move(minmax)); } } + else + { + /// Trivial count optimization only applies after @can_use_minmax_projection. + if (keys.empty() && aggregates.size() == 1 && typeid_cast(aggregates[0].function.get())) + candidates.only_count_column = aggregates[0].column_name; + } } if (!candidates.minmax_projection) @@ -584,13 +593,21 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu ContextPtr context = reading->getContext(); MergeTreeDataSelectExecutor reader(reading->getMergeTreeData()); AggregateProjectionCandidate * best_candidate = nullptr; + + /// Stores row count from exact ranges of parts. + size_t exact_count = 0; + if (candidates.minmax_projection) { best_candidate = &candidates.minmax_projection->candidate; } - else if (!candidates.real.empty()) + else if (!candidates.real.empty() || !candidates.only_count_column.empty()) { - auto ordinary_reading_select_result = reading->selectRangesToRead(); + auto ordinary_reading_select_result = reading->getAnalyzedResult(); + bool find_exact_ranges = !candidates.only_count_column.empty(); + if (!ordinary_reading_select_result || (!ordinary_reading_select_result->has_exact_ranges && find_exact_ranges)) + ordinary_reading_select_result = reading->selectRangesToRead(find_exact_ranges); + size_t ordinary_reading_marks = ordinary_reading_select_result->selected_marks; /// Nothing to read. Ignore projections. @@ -600,7 +617,49 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu return {}; } - const auto & parts_with_ranges = ordinary_reading_select_result->parts_with_ranges; + auto & parts_with_ranges = ordinary_reading_select_result->parts_with_ranges; + + if (!candidates.only_count_column.empty()) + { + for (auto & part_with_ranges : parts_with_ranges) + { + MarkRanges new_ranges; + auto & ranges = part_with_ranges.ranges; + const auto & exact_ranges = part_with_ranges.exact_ranges; + if (exact_ranges.empty()) + continue; + + size_t i = 0; + size_t len = exact_ranges.size(); + for (auto & range : ranges) + { + while (i < len && exact_ranges[i].begin < range.end) + { + chassert(exact_ranges[i].begin >= range.begin); + chassert(exact_ranges[i].end <= range.end); + + /// Found some marks which are not exact + if (range.begin < exact_ranges[i].begin) + new_ranges.emplace_back(range.begin, exact_ranges[i].begin); + + range.begin = exact_ranges[i].end; + ordinary_reading_marks -= exact_ranges[i].end - exact_ranges[i].begin; + exact_count += part_with_ranges.data_part->index_granularity.getRowsCountInRange(exact_ranges[i]); + ++i; + } + + /// Current range still contains some marks which are not exact + if (range.begin < range.end) + new_ranges.emplace_back(range); + } + chassert(i == len); + part_with_ranges.ranges = std::move(new_ranges); + } + + std::erase_if(parts_with_ranges, [&](const auto & part_with_ranges) { return part_with_ranges.ranges.empty(); }); + if (parts_with_ranges.empty()) + chassert(ordinary_reading_marks == 0); + } /// Selecting best candidate. for (auto & candidate : candidates.real) @@ -630,8 +689,20 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu if (!best_candidate) { - reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); - return {}; + if (exact_count > 0) + { + if (ordinary_reading_marks > 0) + { + ordinary_reading_select_result->selected_marks = ordinary_reading_marks; + ordinary_reading_select_result->selected_rows -= exact_count; + reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); + } + } + else + { + reading->setAnalyzedResult(std::move(ordinary_reading_select_result)); + return {}; + } } } else @@ -639,10 +710,11 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu return {}; } - chassert(best_candidate != nullptr); - QueryPlanStepPtr projection_reading; bool has_ordinary_parts; + String selected_projection_name; + if (best_candidate) + selected_projection_name = best_candidate->projection->name; /// Add reading from projection step. if (candidates.minmax_projection) @@ -654,6 +726,32 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu projection_reading = std::make_unique(std::move(pipe)); has_ordinary_parts = false; } + else if (best_candidate == nullptr) + { + chassert(exact_count > 0); + + auto agg_count = std::make_shared(DataTypes{}); + + std::vector state(agg_count->sizeOfData()); + AggregateDataPtr place = state.data(); + agg_count->create(place); + SCOPE_EXIT_MEMORY_SAFE(agg_count->destroy(place)); + agg_count->set(place, exact_count); + + auto column = ColumnAggregateFunction::create(agg_count); + column->insertFrom(place); + + Block block_with_count{ + {std::move(column), + std::make_shared(agg_count, DataTypes{}, Array{}), + candidates.only_count_column}}; + + Pipe pipe(std::make_shared(std::move(block_with_count))); + projection_reading = std::make_unique(std::move(pipe)); + + selected_projection_name = "Optimized trivial count"; + has_ordinary_parts = reading->getAnalyzedResult() != nullptr; + } else { auto storage_snapshot = reading->getStorageSnapshot(); @@ -694,46 +792,54 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu context->getQueryContext()->addQueryAccessInfo(Context::QualifiedProjectionName { .storage_id = reading->getMergeTreeData().getStorageID(), - .projection_name = best_candidate->projection->name, + .projection_name = selected_projection_name, }); } // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection reading header {}", // projection_reading->getOutputStream().header.dumpStructure()); - projection_reading->setStepDescription(best_candidate->projection->name); - + projection_reading->setStepDescription(selected_projection_name); auto & projection_reading_node = nodes.emplace_back(QueryPlan::Node{.step = std::move(projection_reading)}); - auto & expr_or_filter_node = nodes.emplace_back(); - if (candidates.has_filter) + /// Root node of optimized child plan using @projection_name + QueryPlan::Node * aggregate_projection_node = nullptr; + + if (best_candidate) { - expr_or_filter_node.step = std::make_unique( - projection_reading_node.step->getOutputStream(), - best_candidate->dag, - best_candidate->dag->getOutputs().front()->result_name, - true); - } - else - expr_or_filter_node.step = std::make_unique( - projection_reading_node.step->getOutputStream(), - best_candidate->dag); + aggregate_projection_node = &nodes.emplace_back(); + if (candidates.has_filter) + { + aggregate_projection_node->step = std::make_unique( + projection_reading_node.step->getOutputStream(), + best_candidate->dag, + best_candidate->dag->getOutputs().front()->result_name, + true); + } + else + aggregate_projection_node->step + = std::make_unique(projection_reading_node.step->getOutputStream(), best_candidate->dag); - expr_or_filter_node.children.push_back(&projection_reading_node); + aggregate_projection_node->children.push_back(&projection_reading_node); + } + else /// trivial count optimization + { + aggregate_projection_node = &projection_reading_node; + } if (!has_ordinary_parts) { /// All parts are taken from projection - aggregating->requestOnlyMergeForAggregateProjection(expr_or_filter_node.step->getOutputStream()); - node.children.front() = &expr_or_filter_node; + aggregating->requestOnlyMergeForAggregateProjection(aggregate_projection_node->step->getOutputStream()); + node.children.front() = aggregate_projection_node; } else { - node.step = aggregating->convertToAggregatingProjection(expr_or_filter_node.step->getOutputStream()); - node.children.push_back(&expr_or_filter_node); + node.step = aggregating->convertToAggregatingProjection(aggregate_projection_node->step->getOutputStream()); + node.children.push_back(aggregate_projection_node); } - return best_candidate->projection->name; + return selected_projection_name; } } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 728aaaa6fc4..0af3869ccf1 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -139,7 +139,9 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod const auto & query_info = reading->getQueryInfo(); MergeTreeDataSelectExecutor reader(reading->getMergeTreeData()); - auto ordinary_reading_select_result = reading->selectRangesToRead(); + auto ordinary_reading_select_result = reading->getAnalyzedResult(); + if (!ordinary_reading_select_result) + ordinary_reading_select_result = reading->selectRangesToRead(); size_t ordinary_reading_marks = ordinary_reading_select_result->selected_marks; /// Nothing to read. Ignore projections. diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index 3009460a468..af1578d6af8 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -25,8 +25,7 @@ namespace QueryPlanOptimizations bool canUseProjectionForReadingStep(ReadFromMergeTree * reading) { - /// Probably some projection already was applied. - if (reading->hasAnalyzedResult()) + if (reading->getAnalyzedResult() && reading->getAnalyzedResult()->readFromProjection()) return false; if (reading->isQueryWithFinal()) diff --git a/src/Processors/QueryPlan/ReadFromLoopStep.cpp b/src/Processors/QueryPlan/ReadFromLoopStep.cpp index 10436490a2a..2e5fa3ec9f7 100644 --- a/src/Processors/QueryPlan/ReadFromLoopStep.cpp +++ b/src/Processors/QueryPlan/ReadFromLoopStep.cpp @@ -1,14 +1,15 @@ -#include -#include -#include -#include -#include +#include +#include +#include #include #include -#include -#include +#include +#include #include -#include +#include +#include +#include +#include namespace DB { @@ -111,6 +112,13 @@ namespace DB std::unique_ptr executor; }; + static ContextPtr disableParallelReplicas(ContextPtr context) + { + auto modified_context = Context::createCopy(context); + modified_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + return modified_context; + } + ReadFromLoopStep::ReadFromLoopStep( const Names & column_names_, const SelectQueryInfo & query_info_, @@ -125,7 +133,7 @@ namespace DB column_names_, query_info_, storage_snapshot_, - context_) + disableParallelReplicas(context_)) , column_names(column_names_) , processed_stage(processed_stage_) , inner_storage(std::move(inner_storage_)) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 3997f91b5e8..dc6d96a721e 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -120,6 +120,7 @@ namespace ProfileEvents extern const Event SelectedParts; extern const Event SelectedRanges; extern const Event SelectedMarks; + extern const Event SelectQueriesWithPrimaryKeyUsage; } namespace DB @@ -249,9 +250,9 @@ void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, c { /// Fail fast if estimated number of rows to read exceeds the limit size_t total_rows_estimate = selected_rows; - if (query_info_.limit > 0 && total_rows_estimate > query_info_.limit) + if (query_info_.trivial_limit > 0 && total_rows_estimate > query_info_.trivial_limit) { - total_rows_estimate = query_info_.limit; + total_rows_estimate = query_info_.trivial_limit; } limits.check(total_rows_estimate, 0, "rows (controlled by 'max_rows_to_read' setting)", ErrorCodes::TOO_MANY_ROWS); leaf_limits.check( @@ -382,7 +383,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); pipes.emplace_back(std::move(source)); } @@ -397,8 +398,8 @@ Pipe ReadFromMergeTree::readFromPool( { size_t total_rows = parts_with_range.getRowsCountAllParts(); - if (query_info.limit > 0 && query_info.limit < total_rows) - total_rows = query_info.limit; + if (query_info.trivial_limit > 0 && query_info.trivial_limit < total_rows) + total_rows = query_info.trivial_limit; const auto & settings = context->getSettingsRef(); @@ -435,7 +436,7 @@ Pipe ReadFromMergeTree::readFromPool( * Because time spend during filling per thread tasks can be greater than whole query * execution for big tables with small limit. */ - bool use_prefetched_read_pool = query_info.limit == 0 && (allow_prefetched_remote || allow_prefetched_local); + bool use_prefetched_read_pool = query_info.trivial_limit == 0 && (allow_prefetched_remote || allow_prefetched_local); if (use_prefetched_read_pool) { @@ -481,7 +482,7 @@ Pipe ReadFromMergeTree::readFromPool( pool, std::move(algorithm), prewhere_info, actions_settings, block_size_copy, reader_settings); - auto source = std::make_shared(std::move(processor)); + auto source = std::make_shared(std::move(processor), data.getLogName()); if (i == 0) source->addTotalRowsApprox(total_rows); @@ -500,11 +501,11 @@ Pipe ReadFromMergeTree::readInOrder( Names required_columns, PoolSettings pool_settings, ReadType read_type, - UInt64 limit) + UInt64 read_limit) { /// For reading in order it makes sense to read only /// one range per task to reduce number of read rows. - bool has_limit_below_one_block = read_type != ReadType::Default && limit && limit < block_size.max_block_size_rows; + bool has_limit_below_one_block = read_type != ReadType::Default && read_limit && read_limit < block_size.max_block_size_rows; MergeTreeReadPoolPtr pool; if (is_parallel_reading_from_replicas) @@ -562,9 +563,8 @@ Pipe ReadFromMergeTree::readInOrder( /// Actually it means that parallel reading from replicas enabled /// and we have to collaborate with initiator. /// In this case we won't set approximate rows, because it will be accounted multiple times. - /// Also do not count amount of read rows if we read in order of sorting key, - /// because we don't know actual amount of read rows in case when limit is set. - bool set_rows_approx = !is_parallel_reading_from_replicas && !reader_settings.read_in_order; + const auto in_order_limit = query_info.input_order_info ? query_info.input_order_info->limit : 0; + const bool set_total_rows_approx = !is_parallel_reading_from_replicas; Pipes pipes; for (size_t i = 0; i < parts_with_ranges.size(); ++i) @@ -572,8 +572,10 @@ Pipe ReadFromMergeTree::readInOrder( const auto & part_with_ranges = parts_with_ranges[i]; UInt64 total_rows = part_with_ranges.getRowsCount(); - if (query_info.limit > 0 && query_info.limit < total_rows) - total_rows = query_info.limit; + if (query_info.trivial_limit > 0 && query_info.trivial_limit < total_rows) + total_rows = query_info.trivial_limit; + else if (in_order_limit > 0 && in_order_limit < total_rows) + total_rows = in_order_limit; LOG_TRACE(log, "Reading {} ranges in{}order from part {}, approx. {} rows starting from {}", part_with_ranges.ranges.size(), @@ -593,8 +595,8 @@ Pipe ReadFromMergeTree::readInOrder( processor->addPartLevelToChunk(isQueryWithFinal()); - auto source = std::make_shared(std::move(processor)); - if (set_rows_approx) + auto source = std::make_shared(std::move(processor), data.getLogName()); + if (set_total_rows_approx) source->addTotalRowsApprox(total_rows); pipes.emplace_back(std::move(source)); @@ -849,10 +851,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ static ActionsDAGPtr createProjection(const Block & header) { - auto projection = std::make_shared(header.getNamesAndTypesList()); - projection->removeUnusedActions(header.getNames()); - projection->projectInput(); - return projection; + return std::make_shared(header.getNamesAndTypesList()); } Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( @@ -1092,8 +1091,7 @@ static void addMergingFinal( MergeTreeData::MergingParams merging_params, Names partition_key_columns, size_t max_block_size_rows, - bool enable_vertical_final, - bool can_merge_final_indices_to_next_step_filter) + bool enable_vertical_final) { const auto & header = pipe.getHeader(); size_t num_outputs = pipe.numOutputPorts(); @@ -1135,7 +1133,7 @@ static void addMergingFinal( }; pipe.addTransform(get_merging_processor()); - if (enable_vertical_final && !can_merge_final_indices_to_next_step_filter) + if (enable_vertical_final) pipe.addSimpleTransform([](const Block & header_) { return std::make_shared(header_); }); } @@ -1233,7 +1231,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( bool no_merging_final = do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && parts_to_merge_ranges[range_index]->data_part->info.level > 0 && - data.merging_params.is_deleted_column.empty(); + data.merging_params.is_deleted_column.empty() && !reader_settings.read_in_order; if (no_merging_final) { @@ -1268,7 +1266,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// Parts of non-zero level still may contain duplicate PK values to merge on FINAL if there's is_deleted column, /// so we have to process all ranges. It would be more optimal to remove this flag and add an extra filtering step. bool split_parts_ranges_into_intersecting_and_non_intersecting_final = settings.split_parts_ranges_into_intersecting_and_non_intersecting_final && - data.merging_params.is_deleted_column.empty(); + data.merging_params.is_deleted_column.empty() && !reader_settings.read_in_order; SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey( metadata_for_reading->getPrimaryKey(), @@ -1323,8 +1321,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( data.merging_params, partition_key_columns, block_size.max_block_size_rows, - enable_vertical_final, - query_info.has_filters_and_no_array_join_before_filter); + enable_vertical_final); merging_pipes.emplace_back(Pipe::unitePipes(std::move(pipes))); } @@ -1358,9 +1355,9 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( return merging_pipes.empty() ? Pipe::unitePipes(std::move(no_merging_pipes)) : Pipe::unitePipes(std::move(merging_pipes)); } -ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead() const +ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(bool find_exact_ranges) const { - return selectRangesToRead(prepared_parts, alter_conversions_for_parts, false /* find_exact_ranges */); + return selectRangesToRead(prepared_parts, alter_conversions_for_parts, find_exact_ranges); } ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( @@ -1574,11 +1571,17 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( if (indexes->part_values && indexes->part_values->empty()) return std::make_shared(std::move(result)); - if (settings.force_primary_key && indexes->key_condition.alwaysUnknownOrTrue()) + if (indexes->key_condition.alwaysUnknownOrTrue()) { - throw Exception(ErrorCodes::INDEX_NOT_USED, - "Primary key ({}) is not used and setting 'force_primary_key' is set", - fmt::join(primary_key_column_names, ", ")); + if (settings.force_primary_key) + { + throw Exception(ErrorCodes::INDEX_NOT_USED, + "Primary key ({}) is not used and setting 'force_primary_key' is set", + fmt::join(primary_key_column_names, ", ")); + } + } else + { + ProfileEvents::increment(ProfileEvents::SelectQueriesWithPrimaryKeyUsage); } LOG_DEBUG(log, "Key condition: {}", indexes->key_condition.toString()); @@ -1664,6 +1667,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( result.selected_marks_pk = sum_marks_pk; result.total_marks_pk = total_marks_pk; result.selected_rows = sum_rows; + result.has_exact_ranges = result.selected_parts == 0 || find_exact_ranges; if (query_info_.input_order_info) result.read_type = (query_info_.input_order_info->direction > 0) @@ -1673,7 +1677,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( return std::make_shared(std::move(result)); } -bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, size_t limit) +bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, size_t read_limit) { /// if dirction is not set, use current one if (!direction) @@ -1684,7 +1688,7 @@ bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, if (direction != 1 && query_info.isFinal()) return false; - query_info.input_order_info = std::make_shared(SortDescription{}, prefix_size, direction, limit); + query_info.input_order_info = std::make_shared(SortDescription{}, prefix_size, direction, read_limit); reader_settings.read_in_order = true; /// In case or read-in-order, don't create too many reading streams. @@ -1811,8 +1815,10 @@ bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const { - auto result_ptr = analyzed_result_ptr ? analyzed_result_ptr : selectRangesToRead(); - return *result_ptr; + if (!analyzed_result_ptr) + analyzed_result_ptr = selectRangesToRead(); + + return *analyzed_result_ptr; } bool ReadFromMergeTree::isQueryWithSampling() const diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 243ec737456..caa8aa2e1bd 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -100,7 +100,9 @@ public: UInt64 selected_marks_pk = 0; UInt64 total_marks_pk = 0; UInt64 selected_rows = 0; + bool has_exact_ranges = false; + bool readFromProjection() const { return !parts_with_ranges.empty() && parts_with_ranges.front().data_part->isProjectionPart(); } void checkLimits(const Settings & settings, const SelectQueryInfo & query_info_) const; }; @@ -167,7 +169,7 @@ public: AnalysisResultPtr selectRangesToRead( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, bool find_exact_ranges = false) const; - AnalysisResultPtr selectRangesToRead() const; + AnalysisResultPtr selectRangesToRead(bool find_exact_ranges = false) const; StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; } @@ -182,7 +184,7 @@ public: bool requestOutputEachPartitionThroughSeparatePort(); bool willOutputEachPartitionThroughSeparatePort() const { return output_each_partition_through_separate_port; } - bool hasAnalyzedResult() const { return analyzed_result_ptr != nullptr; } + AnalysisResultPtr getAnalyzedResult() const { return analyzed_result_ptr; } void setAnalyzedResult(AnalysisResultPtr analyzed_result_ptr_) { analyzed_result_ptr = std::move(analyzed_result_ptr_); } const MergeTreeData::DataPartsVector & getParts() const { return prepared_parts; } @@ -261,7 +263,7 @@ private: ReadFromMergeTree::AnalysisResult getAnalysisResult() const; - AnalysisResultPtr analyzed_result_ptr; + mutable AnalysisResultPtr analyzed_result_ptr; VirtualFields shared_virtual_fields; bool is_parallel_reading_from_replicas; diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 11371578c79..eb974259c5e 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -393,7 +393,7 @@ ReadFromSystemNumbersStep::ReadFromSystemNumbersStep( , num_streams{num_streams_} , limit_length_and_offset(InterpreterSelectQuery::getLimitLengthAndOffset(query_info.query->as(), context)) , should_pushdown_limit(shouldPushdownLimit(query_info, limit_length_and_offset.first)) - , limit(query_info.limit) + , query_info_limit(query_info.trivial_limit) , storage_limits(query_info.storage_limits) { storage_snapshot->check(column_names); @@ -563,7 +563,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() { auto rows_appr = (*numbers_storage.limit - 1) / numbers_storage.step + 1; if (limit > 0 && limit < rows_appr) - rows_appr = limit; + rows_appr = query_info_limit; source->addTotalRowsApprox(rows_appr); } diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h index bc84e31be62..e33d67d7150 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h @@ -41,7 +41,7 @@ private: size_t num_streams; std::pair limit_length_and_offset; bool should_pushdown_limit; - UInt64 limit; + UInt64 query_info_limit; std::shared_ptr storage_limits; }; diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h index 0971b99d828..ca4ea4f3704 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.h +++ b/src/Processors/QueryPlan/SourceStepWithFilter.h @@ -8,8 +8,9 @@ namespace DB { -/** Source step that can use filters for more efficient pipeline initialization. +/** Source step that can use filters and limit for more efficient pipeline initialization. * Filters must be added before pipeline initialization. + * Limit must be set before pipeline initialization. */ class SourceStepWithFilter : public ISourceStep { @@ -49,9 +50,9 @@ public: filter_dags.push_back(std::move(filter_dag)); } - void addFilterFromParentStep(const ActionsDAG::Node * filter_node) + void setLimit(size_t limit_value) { - filter_nodes.nodes.push_back(filter_node); + limit = limit_value; } /// Apply filters that can optimize reading from storage. @@ -77,6 +78,7 @@ protected: PrewhereInfoPtr prewhere_info; StorageSnapshotPtr storage_snapshot; ContextPtr context; + std::optional limit; ActionsDAGPtr filter_actions_dag; diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 4b828d6699c..37a84d9fe96 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -120,7 +120,7 @@ Chunk PostgreSQLSource::generate() MutableColumns columns = description.sample_block.cloneEmptyColumns(); size_t num_rows = 0; - while (true) + while (!isCancelled()) { const std::vector * row{stream->read_row()}; @@ -191,6 +191,12 @@ PostgreSQLSource::~PostgreSQLSource() { try { + if (stream) + { + tx->conn().cancel_query(); + stream->close(); + } + stream.reset(); tx.reset(); } diff --git a/src/Processors/Sources/RecursiveCTESource.cpp b/src/Processors/Sources/RecursiveCTESource.cpp index 93503b45aaf..221198c622a 100644 --- a/src/Processors/Sources/RecursiveCTESource.cpp +++ b/src/Processors/Sources/RecursiveCTESource.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index e6c2bcec2c8..7945b3999c1 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -178,7 +178,7 @@ void AddingDefaultsTransform::transform(Chunk & chunk) auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false); if (dag) { - auto actions = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); + auto actions = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes), true); actions->execute(evaluate_block); } diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index b48d435720a..65f0612d738 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include namespace ProfileEvents @@ -783,7 +783,7 @@ void AggregatingTransform::initGenerate() { /// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes` static constexpr size_t oneMB = 1024 * 1024; - return std::make_shared(header, params->params.max_block_size, oneMB); + return std::make_shared(header, params->params.max_block_size, oneMB); }); } /// AggregatingTransform::expandPipeline expects single output port. diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h new file mode 100644 index 00000000000..965a084bb13 --- /dev/null +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -0,0 +1,63 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class ApplySquashingTransform : public ExceptionKeepingTransform +{ +public: + explicit ApplySquashingTransform(const Block & header, const size_t min_block_size_rows, const size_t min_block_size_bytes) + : ExceptionKeepingTransform(header, header, false) + , squashing(header, min_block_size_rows, min_block_size_bytes) + { + } + + String getName() const override { return "ApplySquashingTransform"; } + + void work() override + { + if (stage == Stage::Exception) + { + data.chunk.clear(); + ready_input = false; + return; + } + + ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } + } + +protected: + void onConsume(Chunk chunk) override + { + if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) + cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); + } + + GenerateResult onGenerate() override + { + GenerateResult res; + res.chunk = std::move(cur_chunk); + res.is_done = true; + return res; + } + void onFinish() override + { + auto chunk = DB::Squashing::squash({}); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + } + +private: + Squashing squashing; + Chunk cur_chunk; + Chunk finish_chunk; +}; + +} diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index e8e7f99ce53..cd87019a8e0 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -14,7 +14,6 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; - extern const int LOGICAL_ERROR; } static void replaceFilterToConstant(Block & block, const String & filter_column_name) @@ -37,147 +36,6 @@ static void replaceFilterToConstant(Block & block, const String & filter_column_ } } -static std::shared_ptr getSelectByFinalIndices(Chunk & chunk) -{ - if (auto select_final_indices_info = std::dynamic_pointer_cast(chunk.getChunkInfo())) - { - const auto & index_column = select_final_indices_info->select_final_indices; - chunk.setChunkInfo(nullptr); - if (index_column && index_column->size() != chunk.getNumRows()) - return select_final_indices_info; - } - return nullptr; -} - -static void -executeSelectByIndices(Columns & columns, std::shared_ptr & select_final_indices_info, size_t & num_rows) -{ - if (select_final_indices_info) - { - const auto & index_column = select_final_indices_info->select_final_indices; - - for (auto & column : columns) - column = column->index(*index_column, 0); - - num_rows = index_column->size(); - } -} - -static std::unique_ptr combineFilterAndIndices( - std::unique_ptr description, - std::shared_ptr & select_final_indices_info, - size_t num_rows) -{ - if (select_final_indices_info) - { - const auto * index_column = select_final_indices_info->select_final_indices; - - if (description->hasOne()) - { - const auto & selected_by_indices = index_column->getData(); - const auto * selected_by_filter = description->data->data(); - /// We will recompute new has_one - description->has_one = 0; - /// At this point we know that the filter is not constant, just create a new filter - auto mutable_holder = ColumnUInt8::create(num_rows, 0); - auto & data = mutable_holder->getData(); - for (auto idx : selected_by_indices) - { - if (idx >= num_rows) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index {} out of range {}", idx, num_rows); - data[idx] = 1; - } - - /// AND two filters - auto * begin = data.data(); - const auto * end = begin + num_rows; -#if defined(__AVX2__) - while (end - begin >= 32) - { - _mm256_storeu_si256( - reinterpret_cast<__m256i *>(begin), - _mm256_and_si256( - _mm256_loadu_si256(reinterpret_cast(begin)), - _mm256_loadu_si256(reinterpret_cast(selected_by_filter)))); - description->has_one |= !memoryIsZero(begin, 0, 32); - begin += 32; - selected_by_filter += 32; - } -#elif defined(__SSE2__) - while (end - begin >= 16) - { - _mm_storeu_si128( - reinterpret_cast<__m128i *>(begin), - _mm_and_si128( - _mm_loadu_si128(reinterpret_cast(begin)), - _mm_loadu_si128(reinterpret_cast(selected_by_filter)))); - description->has_one |= !memoryIsZero(begin, 0, 16); - begin += 16; - selected_by_filter += 16; - } -#endif - - while (end - begin >= 8) - { - *reinterpret_cast(begin) &= *reinterpret_cast(selected_by_filter); - description->has_one |= *reinterpret_cast(begin); - begin += 8; - selected_by_filter += 8; - } - - while (end - begin > 0) - { - *begin &= *selected_by_filter; - description->has_one |= *begin; - begin++; - selected_by_filter++; - } - - description->data_holder = std::move(mutable_holder); - description->data = &data; - } - } - return std::move(description); -} - -static std::unique_ptr combineFilterAndIndices( - std::unique_ptr description, - std::shared_ptr & select_final_indices_info, - size_t num_rows) -{ - /// Iterator interface to decorate data from output of std::set_intersection - struct Iterator - { - UInt8 * data; - Int64 & pop_cnt; - explicit Iterator(UInt8 * data_, Int64 & pop_cnt_) : data(data_), pop_cnt(pop_cnt_) {} - Iterator & operator = (UInt64 index) { data[index] = 1; ++pop_cnt; return *this; } - Iterator & operator ++ () { return *this; } - Iterator & operator * () { return *this; } - }; - - if (select_final_indices_info) - { - const auto * index_column = select_final_indices_info->select_final_indices; - - if (description->hasOne()) - { - std::unique_ptr res; - res->has_one = 0; - const auto & selected_by_indices = index_column->getData(); - const auto & selected_by_filter = description->filter_indices->getData(); - auto mutable_holder = ColumnUInt8::create(num_rows, 0); - auto & data = mutable_holder->getData(); - Iterator decorator(data.data(), res->has_one); - std::set_intersection(selected_by_indices.begin(), selected_by_indices.end(), selected_by_filter.begin(), selected_by_filter.end(), decorator); - res->data_holder = std::move(mutable_holder); - res->data = &data; - return res; - } - } - return std::move(description); -} - Block FilterTransform::transformHeader( const Block & header, const ActionsDAG * expression, const String & filter_column_name, bool remove_filter_column) { @@ -267,7 +125,6 @@ void FilterTransform::doTransform(Chunk & chunk) size_t num_rows_before_filtration = chunk.getNumRows(); auto columns = chunk.detachColumns(); DataTypes types; - auto select_final_indices_info = getSelectByFinalIndices(chunk); { Block block = getInputPort().getHeader().cloneWithColumns(columns); @@ -282,7 +139,6 @@ void FilterTransform::doTransform(Chunk & chunk) if (constant_filter_description.always_true || on_totals) { - executeSelectByIndices(columns, select_final_indices_info, num_rows_before_filtration); chunk.setColumns(std::move(columns), num_rows_before_filtration); removeFilterIfNeed(chunk); return; @@ -303,7 +159,6 @@ void FilterTransform::doTransform(Chunk & chunk) if (constant_filter_description.always_true) { - executeSelectByIndices(columns, select_final_indices_info, num_rows_before_filtration); chunk.setColumns(std::move(columns), num_rows_before_filtration); removeFilterIfNeed(chunk); return; @@ -311,15 +166,9 @@ void FilterTransform::doTransform(Chunk & chunk) std::unique_ptr filter_description; if (filter_column->isSparse()) - filter_description = combineFilterAndIndices( - std::make_unique(*filter_column), select_final_indices_info, num_rows_before_filtration); + filter_description = std::make_unique(*filter_column); else - filter_description = combineFilterAndIndices( - std::make_unique(*filter_column), select_final_indices_info, num_rows_before_filtration); - - - if (!filter_description->has_one) - return; + filter_description = std::make_unique(*filter_column); /** Let's find out how many rows will be in result. * To do this, we filter out the first non-constant column diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 159a3244fe9..fb3b2faa9c5 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -19,6 +18,7 @@ #include #include #include +#include #include @@ -40,7 +40,7 @@ FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns) desc.reserve(columns.size()); for (const auto & name : columns) desc.emplace_back(name); - return std::make_unique(materializeBlock(block), desc); + return std::make_unique(block, desc); } template @@ -234,9 +234,14 @@ void inline addMany(PaddedPODArray & left_or_right_map, size_t idx, size for (size_t i = 0; i < num; ++i) left_or_right_map.push_back(idx); } - } +FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) + : sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_) +{ +} + + const Chunk & FullMergeJoinCursor::getCurrent() const { return current_chunk; @@ -260,6 +265,10 @@ void FullMergeJoinCursor::setChunk(Chunk && chunk) return; } + // should match the structure of sample_block (after materialization) + convertToFullIfConst(chunk); + convertToFullIfSparse(chunk); + current_chunk = std::move(chunk); cursor = SortCursorImpl(sample_block, current_chunk.getColumns(), desc); } diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index cf9331abd59..5ca6b076544 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -193,11 +193,7 @@ private: class FullMergeJoinCursor : boost::noncopyable { public: - explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) - : sample_block(sample_block_.cloneEmpty()) - , desc(description_) - { - } + explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_); bool fullyCompleted() const; void setChunk(Chunk && chunk); diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp new file mode 100644 index 00000000000..0f433165f14 --- /dev/null +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -0,0 +1,44 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +PlanSquashingTransform::PlanSquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) +{ +} + +void PlanSquashingTransform::consume(Chunk chunk) +{ + if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo()) + squashed_chunk.swap(current_chunk); +} + +Chunk PlanSquashingTransform::generate() +{ + if (!squashed_chunk.hasChunkInfo()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); + + Chunk result_chunk; + result_chunk.swap(squashed_chunk); + return result_chunk; +} + +bool PlanSquashingTransform::canGenerate() +{ + return squashed_chunk.hasChunkInfo(); +} + +Chunk PlanSquashingTransform::getRemaining() +{ + Chunk current_chunk = squashing.flush(); + return current_chunk; +} +} diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h new file mode 100644 index 00000000000..4ad2ec2d089 --- /dev/null +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class PlanSquashingTransform : public IInflatingTransform +{ +public: + PlanSquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + + String getName() const override { return "PlanSquashingTransform"; } + +protected: + void consume(Chunk chunk) override; + bool canGenerate() override; + Chunk generate() override; + Chunk getRemaining() override; + +private: + Squashing squashing; + Chunk squashed_chunk; + Chunk finish_chunk; +}; +} + diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp deleted file mode 100644 index ed67dd508f3..00000000000 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ /dev/null @@ -1,94 +0,0 @@ -#include - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int LOGICAL_ERROR; -} - -SquashingChunksTransform::SquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) -{ -} - -void SquashingChunksTransform::onConsume(Chunk chunk) -{ - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) - { - cur_chunk.setColumns(block.getColumns(), block.rows()); - } -} - -SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() -{ - GenerateResult res; - res.chunk = std::move(cur_chunk); - res.is_done = true; - return res; -} - -void SquashingChunksTransform::onFinish() -{ - auto block = squashing.add({}); - finish_chunk.setColumns(block.getColumns(), block.rows()); -} - -void SquashingChunksTransform::work() -{ - if (stage == Stage::Exception) - { - data.chunk.clear(); - ready_input = false; - return; - } - - ExceptionKeepingTransform::work(); - if (finish_chunk) - { - data.chunk = std::move(finish_chunk); - ready_output = true; - } -} - -SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) -{ -} - -void SimpleSquashingChunksTransform::consume(Chunk chunk) -{ - Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); - squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); -} - -Chunk SimpleSquashingChunksTransform::generate() -{ - if (squashed_chunk.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); - - Chunk result_chunk; - result_chunk.swap(squashed_chunk); - return result_chunk; -} - -bool SimpleSquashingChunksTransform::canGenerate() -{ - return !squashed_chunk.empty(); -} - -Chunk SimpleSquashingChunksTransform::getRemaining() -{ - Block current_block = squashing.add({}); - squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); - - Chunk result_chunk; - result_chunk.swap(squashed_chunk); - return result_chunk; -} - -} diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp new file mode 100644 index 00000000000..34b733cde5e --- /dev/null +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -0,0 +1,108 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +SquashingTransform::SquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : ExceptionKeepingTransform(header, header, false) + , squashing(header, min_block_size_rows, min_block_size_bytes) +{ +} + +void SquashingTransform::onConsume(Chunk chunk) +{ + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); +} + +SquashingTransform::GenerateResult SquashingTransform::onGenerate() +{ + GenerateResult res; + res.chunk = std::move(cur_chunk); + res.is_done = true; + return res; +} + +void SquashingTransform::onFinish() +{ + Chunk chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); +} + +void SquashingTransform::work() +{ + if (stage == Stage::Exception) + { + data.chunk.clear(); + ready_input = false; + return; + } + + ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } +} + +SimpleSquashingTransform::SimpleSquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : ISimpleTransform(header, header, false) + , squashing(header, min_block_size_rows, min_block_size_bytes) +{ +} + +void SimpleSquashingTransform::transform(Chunk & chunk) +{ + if (!finished) + { + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(planned_chunk)); + } + else + { + if (chunk.hasRows()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); + + chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); + } +} + +IProcessor::Status SimpleSquashingTransform::prepare() +{ + if (!finished && input.isFinished()) + { + if (output.isFinished()) + return Status::Finished; + + if (!output.canPush()) + return Status::PortFull; + + if (has_output) + { + output.pushData(std::move(output_data)); + has_output = false; + return Status::PortFull; + } + + finished = true; + /// On the next call to transform() we will return all data buffered in `squashing` (if any) + return Status::Ready; + } + return ISimpleTransform::prepare(); +} +} diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingTransform.h similarity index 54% rename from src/Processors/Transforms/SquashingChunksTransform.h rename to src/Processors/Transforms/SquashingTransform.h index 8c30a6032e4..c5b727ac6ec 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -1,17 +1,17 @@ #pragma once -#include +#include #include -#include #include +#include namespace DB { -class SquashingChunksTransform : public ExceptionKeepingTransform +class SquashingTransform : public ExceptionKeepingTransform { public: - explicit SquashingChunksTransform( + explicit SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "SquashingTransform"; } @@ -24,28 +24,27 @@ protected: void onFinish() override; private: - SquashingTransform squashing; + Squashing squashing; Chunk cur_chunk; Chunk finish_chunk; }; /// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingChunksTransform : public IInflatingTransform +class SimpleSquashingTransform : public ISimpleTransform { public: - explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + explicit SimpleSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "SimpleSquashingTransform"; } protected: - void consume(Chunk chunk) override; - bool canGenerate() override; - Chunk generate() override; - Chunk getRemaining() override; + void transform(Chunk &) override; + + IProcessor::Status prepare() override; private: - SquashingTransform squashing; - Chunk squashed_chunk; -}; + Squashing squashing; + bool finished = false; +}; } diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index af340c4aab8..b9f61d30182 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -37,7 +37,7 @@ struct fmt::formatter } template - auto format(const DB::RowNumber & x, FormatContext & ctx) + auto format(const DB::RowNumber & x, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}:{}", x.block, x.row); } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index a1a886fb4f7..25fbf13b0e7 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -6,7 +6,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -371,7 +372,7 @@ std::optional generateViewChain( bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( + out.addSource(std::make_shared( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); @@ -622,7 +623,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat /// Squashing is needed here because the materialized view query can generate a lot of blocks /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY /// and two-level aggregation is triggered). - pipeline.addTransform(std::make_shared( + pipeline.addTransform(std::make_shared( pipeline.getHeader(), context->getSettingsRef().min_insert_block_size_rows, context->getSettingsRef().min_insert_block_size_bytes)); diff --git a/src/QueryPipeline/SizeLimits.cpp b/src/QueryPipeline/SizeLimits.cpp index 76832b1f951..4161f3f365f 100644 --- a/src/QueryPipeline/SizeLimits.cpp +++ b/src/QueryPipeline/SizeLimits.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace ProfileEvents diff --git a/src/Server/CertificateReloader.cpp b/src/Server/CertificateReloader.cpp index 98d7a362bd7..df7b6e7fbd7 100644 --- a/src/Server/CertificateReloader.cpp +++ b/src/Server/CertificateReloader.cpp @@ -15,18 +15,23 @@ namespace DB namespace { + /// Call set process for certificate. -int callSetCertificate(SSL * ssl, [[maybe_unused]] void * arg) +int callSetCertificate(SSL * ssl, void * arg) { - return CertificateReloader::instance().setCertificate(ssl); + if (!arg) + return -1; + + const CertificateReloader::MultiData * pdata = reinterpret_cast(arg); + return CertificateReloader::instance().setCertificate(ssl, pdata); } } /// This is callback for OpenSSL. It will be called on every connection to obtain a certificate and private key. -int CertificateReloader::setCertificate(SSL * ssl) +int CertificateReloader::setCertificate(SSL * ssl, const CertificateReloader::MultiData * pdata) { - auto current = data.get(); + auto current = pdata->data.get(); if (!current) return -1; @@ -65,24 +70,54 @@ int CertificateReloader::setCertificate(SSL * ssl) } -void CertificateReloader::init() +void CertificateReloader::init(MultiData * pdata) { LOG_DEBUG(log, "Initializing certificate reloader."); /// Set a callback for OpenSSL to allow get the updated cert and key. - auto* ctx = Poco::Net::SSLManager::instance().defaultServerContext()->sslContext(); - SSL_CTX_set_cert_cb(ctx, callSetCertificate, nullptr); - init_was_not_made = false; + SSL_CTX_set_cert_cb(pdata->ctx, callSetCertificate, reinterpret_cast(pdata)); + pdata->init_was_not_made = false; } void CertificateReloader::tryLoad(const Poco::Util::AbstractConfiguration & config) +{ + tryLoad(config, nullptr, Poco::Net::SSLManager::CFG_SERVER_PREFIX); +} + + +void CertificateReloader::tryLoad(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix) +{ + std::lock_guard lock{data_mutex}; + tryLoadImpl(config, ctx, prefix); +} + + +std::list::iterator CertificateReloader::findOrInsert(SSL_CTX * ctx, const std::string & prefix) +{ + auto it = data.end(); + auto i = data_index.find(prefix); + if (i != data_index.end()) + it = i->second; + else + { + if (!ctx) + ctx = Poco::Net::SSLManager::instance().defaultServerContext()->sslContext(); + data.push_back(MultiData(ctx)); + --it; + data_index[prefix] = it; + } + return it; +} + + +void CertificateReloader::tryLoadImpl(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix) { /// If at least one of the files is modified - recreate - std::string new_cert_path = config.getString("openSSL.server.certificateFile", ""); - std::string new_key_path = config.getString("openSSL.server.privateKeyFile", ""); + std::string new_cert_path = config.getString(prefix + "certificateFile", ""); + std::string new_key_path = config.getString(prefix + "privateKeyFile", ""); /// For empty paths (that means, that user doesn't want to use certificates) /// no processing required @@ -93,32 +128,41 @@ void CertificateReloader::tryLoad(const Poco::Util::AbstractConfiguration & conf } else { - bool cert_file_changed = cert_file.changeIfModified(std::move(new_cert_path), log); - bool key_file_changed = key_file.changeIfModified(std::move(new_key_path), log); - std::string pass_phrase = config.getString("openSSL.server.privateKeyPassphraseHandler.options.password", ""); - - if (cert_file_changed || key_file_changed) - { - LOG_DEBUG(log, "Reloading certificate ({}) and key ({}).", cert_file.path, key_file.path); - data.set(std::make_unique(cert_file.path, key_file.path, pass_phrase)); - LOG_INFO(log, "Reloaded certificate ({}) and key ({}).", cert_file.path, key_file.path); - } - - /// If callback is not set yet try { - if (init_was_not_made) - init(); + auto it = findOrInsert(ctx, prefix); + + bool cert_file_changed = it->cert_file.changeIfModified(std::move(new_cert_path), log); + bool key_file_changed = it->key_file.changeIfModified(std::move(new_key_path), log); + + if (cert_file_changed || key_file_changed) + { + LOG_DEBUG(log, "Reloading certificate ({}) and key ({}).", it->cert_file.path, it->key_file.path); + std::string pass_phrase = config.getString(prefix + "privateKeyPassphraseHandler.options.password", ""); + it->data.set(std::make_unique(it->cert_file.path, it->key_file.path, pass_phrase)); + LOG_INFO(log, "Reloaded certificate ({}) and key ({}).", it->cert_file.path, it->key_file.path); + } + + /// If callback is not set yet + if (it->init_was_not_made) + init(&*it); } catch (...) { - init_was_not_made = true; LOG_ERROR(log, getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false)); } } } +void CertificateReloader::tryReloadAll(const Poco::Util::AbstractConfiguration & config) +{ + std::lock_guard lock{data_mutex}; + for (auto & item : data_index) + tryLoadImpl(config, item.second->ctx, item.first); +} + + CertificateReloader::Data::Data(std::string cert_path, std::string key_path, std::string pass_phrase) : certs_chain(Poco::Crypto::X509Certificate::readPEM(cert_path)), key(/* public key */ "", /* private key */ key_path, pass_phrase) { diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h index 5ab799037d5..7472d2f6baa 100644 --- a/src/Server/CertificateReloader.h +++ b/src/Server/CertificateReloader.h @@ -6,6 +6,9 @@ #include #include +#include +#include +#include #include #include @@ -31,28 +34,13 @@ class CertificateReloader public: using stat_t = struct stat; - /// Singleton - CertificateReloader(CertificateReloader const &) = delete; - void operator=(CertificateReloader const &) = delete; - static CertificateReloader & instance() + struct Data { - static CertificateReloader instance; - return instance; - } + Poco::Crypto::X509Certificate::List certs_chain; + Poco::Crypto::EVPPKey key; - /// Initialize the callback and perform the initial cert loading - void init(); - - /// Handle configuration reload - void tryLoad(const Poco::Util::AbstractConfiguration & config); - - /// A callback for OpenSSL - int setCertificate(SSL * ssl); - -private: - CertificateReloader() = default; - - LoggerPtr log = getLogger("CertificateReloader"); + Data(std::string cert_path, std::string key_path, std::string pass_phrase); + }; struct File { @@ -65,19 +53,55 @@ private: bool changeIfModified(std::string new_path, LoggerPtr logger); }; - File cert_file{"certificate"}; - File key_file{"key"}; - - struct Data + struct MultiData { - Poco::Crypto::X509Certificate::List certs_chain; - Poco::Crypto::EVPPKey key; + SSL_CTX * ctx = nullptr; + MultiVersion data; + bool init_was_not_made = true; - Data(std::string cert_path, std::string key_path, std::string pass_phrase); + File cert_file{"certificate"}; + File key_file{"key"}; + + explicit MultiData(SSL_CTX * ctx_) : ctx(ctx_) {} }; - MultiVersion data; - bool init_was_not_made = true; + /// Singleton + CertificateReloader(CertificateReloader const &) = delete; + void operator=(CertificateReloader const &) = delete; + static CertificateReloader & instance() + { + static CertificateReloader instance; + return instance; + } + + /// Handle configuration reload for default path + void tryLoad(const Poco::Util::AbstractConfiguration & config); + + /// Handle configuration reload + void tryLoad(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix); + + /// Handle configuration reload for all contexts + void tryReloadAll(const Poco::Util::AbstractConfiguration & config); + + /// A callback for OpenSSL + int setCertificate(SSL * ssl, const MultiData * pdata); + +private: + CertificateReloader() = default; + + /// Initialize the callback and perform the initial cert loading + void init(MultiData * pdata) TSA_REQUIRES(data_mutex); + + /// Unsafe implementation + void tryLoadImpl(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix) TSA_REQUIRES(data_mutex); + + std::list::iterator findOrInsert(SSL_CTX * ctx, const std::string & prefix) TSA_REQUIRES(data_mutex); + + LoggerPtr log = getLogger("CertificateReloader"); + + std::list data TSA_GUARDED_BY(data_mutex); + std::unordered_map::iterator> data_index TSA_GUARDED_BY(data_mutex); + mutable std::mutex data_mutex; }; } diff --git a/src/Server/CloudPlacementInfo.cpp b/src/Server/CloudPlacementInfo.cpp index 0790f825a45..d8810bb30de 100644 --- a/src/Server/CloudPlacementInfo.cpp +++ b/src/Server/CloudPlacementInfo.cpp @@ -11,6 +11,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + namespace PlacementInfo { @@ -46,7 +51,15 @@ PlacementInfo & PlacementInfo::instance() } void PlacementInfo::initialize(const Poco::Util::AbstractConfiguration & config) +try { + if (!config.has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX)) + { + availability_zone = ""; + initialized = true; + return; + } + use_imds = config.getBool(getConfigPath("use_imds"), false); if (use_imds) @@ -67,14 +80,17 @@ void PlacementInfo::initialize(const Poco::Util::AbstractConfiguration & config) LOG_DEBUG(log, "Loaded info: availability_zone: {}", availability_zone); initialized = true; } +catch (...) +{ + tryLogCurrentException("Failed to get availability zone"); + availability_zone = ""; + initialized = true; +} std::string PlacementInfo::getAvailabilityZone() const { if (!initialized) - { - LOG_WARNING(log, "Placement info has not been loaded"); - return ""; - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Placement info has not been loaded"); return availability_zone; } diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index 8098671a903..e2098b284bf 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -162,7 +162,8 @@ WriteBufferFromHTTPServerResponse::~WriteBufferFromHTTPServerResponse() { try { - finalize(); + if (!canceled) + finalize(); } catch (...) { diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index d1db4cb3951..a00f6fb5412 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -38,12 +37,15 @@ #include #include #include +#include +#include #include "config.h" #include #include #include +#include #include #include #include @@ -53,7 +55,10 @@ #include #include #include +#include #include +#include +#include #if USE_SSL #include @@ -67,6 +72,8 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int CANNOT_COMPILE_REGEXP; + extern const int CANNOT_OPEN_FILE; extern const int CANNOT_PARSE_TEXT; extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; extern const int CANNOT_PARSE_QUOTED_STRING; @@ -78,8 +85,7 @@ namespace ErrorCodes extern const int CANNOT_PARSE_IPV6; extern const int CANNOT_PARSE_UUID; extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_COMPILE_REGEXP; + extern const int CANNOT_SCHEDULE_TASK; extern const int DUPLICATE_COLUMN; extern const int ILLEGAL_COLUMN; extern const int THERE_IS_NO_COLUMN; @@ -267,6 +273,10 @@ static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int excepti { return HTTPResponse::HTTP_REQUEST_TIMEOUT; } + else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK) + { + return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; + } return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR; } @@ -333,11 +343,11 @@ void HTTPHandler::pushDelayedResults(Output & used_output) } -HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const std::optional & content_type_override_) +HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const HTTPResponseHeaderSetup & http_response_headers_override_) : server(server_) , log(getLogger(name)) , default_settings(server.context()->getSettingsRef()) - , content_type_override(content_type_override_) + , http_response_headers_override(http_response_headers_override_) { server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } @@ -372,7 +382,7 @@ bool HTTPHandler::authenticateUser( bool has_credentials_in_query_params = params.has("user") || params.has("password"); std::string spnego_challenge; - std::string certificate_common_name; + SSLCertificateSubjects certificate_subjects; if (has_auth_headers) { @@ -395,11 +405,11 @@ bool HTTPHandler::authenticateUser( "to use SSL certificate authentication and authentication via password simultaneously"); if (request.havePeerCertificate()) - certificate_common_name = request.peerCertificate().commonName(); + certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate()); - if (certificate_common_name.empty()) + if (certificate_subjects.empty()) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name"); + "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name"); #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL certificate authentication disabled because ClickHouse was built without SSL library"); @@ -443,10 +453,10 @@ bool HTTPHandler::authenticateUser( password = params.get("password", ""); } - if (!certificate_common_name.empty()) + if (!certificate_subjects.empty()) { if (!request_credentials) - request_credentials = std::make_unique(user, certificate_common_name); + request_credentials = std::make_unique(user, std::move(certificate_subjects)); auto * certificate_credentials = dynamic_cast(request_credentials.get()); if (!certificate_credentials) @@ -665,8 +675,7 @@ void HTTPHandler::processQuery( { auto tmp_data = std::make_shared(server.context()->getTempDataOnDisk()); - auto create_tmp_disk_buffer = [tmp_data] (const WriteBufferPtr &) -> WriteBufferPtr - { + auto create_tmp_disk_buffer = [tmp_data] (const WriteBufferPtr &) -> WriteBufferPtr { return tmp_data->createRawStream(); }; @@ -888,13 +897,14 @@ void HTTPHandler::processQuery( customizeContext(request, context, *in_post_maybe_compressed); in = has_external_data ? std::move(in_param) : std::make_unique(*in_param, *in_post_maybe_compressed); + applyHTTPResponseHeaders(response, http_response_headers_override); + auto set_query_result = [&response, this] (const QueryResultDetails & details) { response.add("X-ClickHouse-Query-Id", details.query_id); - if (content_type_override) - response.setContentType(*content_type_override); - else if (details.content_type) + if (!(http_response_headers_override && http_response_headers_override->contains(Poco::Net::HTTPMessage::CONTENT_TYPE)) + && details.content_type) response.setContentType(*details.content_type); if (details.format) @@ -1019,14 +1029,7 @@ catch (...) { tryLogCurrentException(log, "Cannot send exception to client"); - try - { - used_output.finalize(); - } - catch (...) - { - tryLogCurrentException(log, "Cannot flush data to client (after sending exception)"); - } + used_output.cancel(); } void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) @@ -1038,12 +1041,21 @@ void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest /// FIXME: make sure that no one else is reading from the same stream at the moment. - /// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body + /// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body /// to avoid reading part of the current request body in the next request. if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() - && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED && !request.getStream().eof()) + && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) { - request.getStream().ignoreAll(); + try + { + if (!request.getStream().eof()) + request.getStream().ignoreAll(); + } + catch (...) + { + tryLogCurrentException(log, "Cannot read remaining request body during exception handling"); + response.setKeepAlive(false); + } } if (exception_code == ErrorCodes::REQUIRED_PASSWORD) @@ -1055,7 +1067,6 @@ void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { setThreadName("HTTPHandler"); - ThreadStatus thread_status; session = std::make_unique(server.context(), ClientInfo::Interface::HTTP, request.isSecure()); SCOPE_EXIT({ session.reset(); }); @@ -1156,7 +1167,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse /// Check if exception was thrown in used_output.finalize(). /// In this case used_output can be in invalid state and we /// cannot write in it anymore. So, just log this exception. - if (used_output.isFinalized()) + if (used_output.isFinalized() || used_output.isCanceled()) { if (thread_trace_context) thread_trace_context->root_span.addAttribute("clickhouse.exception", "Cannot flush data to client"); @@ -1175,13 +1186,16 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse if (thread_trace_context) thread_trace_context->root_span.addAttribute(status); + + return; } used_output.finalize(); } -DynamicQueryHandler::DynamicQueryHandler(IServer & server_, const std::string & param_name_, const std::optional& content_type_override_) - : HTTPHandler(server_, "DynamicQueryHandler", content_type_override_), param_name(param_name_) +DynamicQueryHandler::DynamicQueryHandler( + IServer & server_, const std::string & param_name_, const HTTPResponseHeaderSetup & http_response_headers_override_) + : HTTPHandler(server_, "DynamicQueryHandler", http_response_headers_override_), param_name(param_name_) { } @@ -1242,8 +1256,8 @@ PredefinedQueryHandler::PredefinedQueryHandler( const std::string & predefined_query_, const CompiledRegexPtr & url_regex_, const std::unordered_map & header_name_with_regex_, - const std::optional & content_type_override_) - : HTTPHandler(server_, "PredefinedQueryHandler", content_type_override_) + const HTTPResponseHeaderSetup & http_response_headers_override_) + : HTTPHandler(server_, "PredefinedQueryHandler", http_response_headers_override_) , receive_params(receive_params_) , predefined_query(predefined_query_) , url_regex(url_regex_) @@ -1335,14 +1349,10 @@ HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server, { auto query_param_name = config.getString(config_prefix + ".handler.query_param_name", "query"); - std::optional content_type_override; - if (config.has(config_prefix + ".handler.content_type")) - content_type_override = config.getString(config_prefix + ".handler.content_type"); + HTTPResponseHeaderSetup http_response_headers_override = parseHTTPResponseHeaders(config, config_prefix); - auto creator = [&server, query_param_name, content_type_override] () -> std::unique_ptr - { - return std::make_unique(server, query_param_name, content_type_override); - }; + auto creator = [&server, query_param_name, http_response_headers_override]() -> std::unique_ptr + { return std::make_unique(server, query_param_name, http_response_headers_override); }; auto factory = std::make_shared>(std::move(creator)); factory->addFiltersFromConfig(config, config_prefix); @@ -1397,9 +1407,7 @@ HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, headers_name_with_regex.emplace(std::make_pair(header_name, regex)); } - std::optional content_type_override; - if (config.has(config_prefix + ".handler.content_type")) - content_type_override = config.getString(config_prefix + ".handler.content_type"); + HTTPResponseHeaderSetup http_response_headers_override = parseHTTPResponseHeaders(config, config_prefix); std::shared_ptr> factory; @@ -1419,12 +1427,12 @@ HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, predefined_query, regex, headers_name_with_regex, - content_type_override] + http_response_headers_override] -> std::unique_ptr { return std::make_unique( server, analyze_receive_params, predefined_query, regex, - headers_name_with_regex, content_type_override); + headers_name_with_regex, http_response_headers_override); }; factory = std::make_shared>(std::move(creator)); factory->addFiltersFromConfig(config, config_prefix); @@ -1437,12 +1445,12 @@ HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, analyze_receive_params, predefined_query, headers_name_with_regex, - content_type_override] + http_response_headers_override] -> std::unique_ptr { return std::make_unique( server, analyze_receive_params, predefined_query, CompiledRegexPtr{}, - headers_name_with_regex, content_type_override); + headers_name_with_regex, http_response_headers_override); }; factory = std::make_shared>(std::move(creator)); diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index a96402247a2..c78c45826f0 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -1,5 +1,8 @@ #pragma once +#include +#include +#include #include #include #include @@ -10,6 +13,8 @@ #include #include +#include "HTTPResponseHeaderWriter.h" + namespace CurrentMetrics { extern const Metric HTTPConnection; @@ -31,7 +36,7 @@ using CompiledRegexPtr = std::shared_ptr; class HTTPHandler : public HTTPRequestHandler { public: - HTTPHandler(IServer & server_, const std::string & name, const std::optional & content_type_override_); + HTTPHandler(IServer & server_, const std::string & name, const HTTPResponseHeaderSetup & http_response_headers_override_); ~HTTPHandler() override; void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; @@ -73,6 +78,7 @@ private: WriteBuffer * out_maybe_delayed_and_compressed = nullptr; bool finalized = false; + bool canceled = false; bool exception_is_written = false; std::function exception_writer; @@ -94,6 +100,24 @@ private: out->finalize(); } + void cancel() + { + if (canceled) + return; + canceled = true; + + if (out_compressed_holder) + out_compressed_holder->cancel(); + if (out) + out->cancel(); + } + + + bool isCanceled() const + { + return canceled; + } + bool isFinalized() const { return finalized; @@ -113,8 +137,8 @@ private: /// See settings http_max_fields, http_max_field_name_size, http_max_field_value_size in HTMLForm. const Settings & default_settings; - /// Overrides Content-Type provided by the format of the response. - std::optional content_type_override; + /// Overrides for response headers. + HTTPResponseHeaderSetup http_response_headers_override; // session is reset at the end of each request/response. std::unique_ptr session; @@ -162,8 +186,12 @@ class DynamicQueryHandler : public HTTPHandler { private: std::string param_name; + public: - explicit DynamicQueryHandler(IServer & server_, const std::string & param_name_ = "query", const std::optional& content_type_override_ = std::nullopt); + explicit DynamicQueryHandler( + IServer & server_, + const std::string & param_name_ = "query", + const HTTPResponseHeaderSetup & http_response_headers_override_ = std::nullopt); std::string getQuery(HTTPServerRequest & request, HTMLForm & params, ContextMutablePtr context) override; @@ -177,11 +205,15 @@ private: std::string predefined_query; CompiledRegexPtr url_regex; std::unordered_map header_name_with_capture_regex; + public: PredefinedQueryHandler( - IServer & server_, const NameSet & receive_params_, const std::string & predefined_query_ - , const CompiledRegexPtr & url_regex_, const std::unordered_map & header_name_with_regex_ - , const std::optional & content_type_override_); + IServer & server_, + const NameSet & receive_params_, + const std::string & predefined_query_, + const CompiledRegexPtr & url_regex_, + const std::unordered_map & header_name_with_regex_, + const HTTPResponseHeaderSetup & http_response_headers_override_ = std::nullopt); void customizeContext(HTTPServerRequest & request, ContextMutablePtr context, ReadBuffer & body) override; diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 9a67e576345..5344b2d024b 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -74,7 +74,8 @@ static auto createPingHandlerFactory(IServer & server) auto creator = [&server]() -> std::unique_ptr { constexpr auto ping_response_expression = "Ok.\n"; - return std::make_unique(server, ping_response_expression); + return std::make_unique( + server, ping_response_expression, parseHTTPResponseHeaders("text/html; charset=UTF-8")); }; return std::make_shared>(std::move(creator)); } @@ -214,7 +215,8 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS auto root_creator = [&server]() -> std::unique_ptr { constexpr auto root_response_expression = "config://http_server_default_response"; - return std::make_unique(server, root_response_expression); + return std::make_unique( + server, root_response_expression, parseHTTPResponseHeaders("text/html; charset=UTF-8")); }; auto root_handler = std::make_shared>(std::move(root_creator)); root_handler->attachStrictPath("/"); diff --git a/src/Server/HTTPResponseHeaderWriter.cpp b/src/Server/HTTPResponseHeaderWriter.cpp new file mode 100644 index 00000000000..fd29af5bdc7 --- /dev/null +++ b/src/Server/HTTPResponseHeaderWriter.cpp @@ -0,0 +1,69 @@ +#include "HTTPResponseHeaderWriter.h" +#include +#include +#include + +namespace DB +{ + +std::unordered_map +baseParseHTTPResponseHeaders(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) +{ + std::unordered_map http_response_headers_override; + String http_response_headers_key = config_prefix + ".handler.http_response_headers"; + String http_response_headers_key_prefix = http_response_headers_key + "."; + if (config.has(http_response_headers_key)) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(http_response_headers_key, keys); + for (const auto & key : keys) + { + http_response_headers_override[key] = config.getString(http_response_headers_key_prefix + key); + } + } + if (config.has(config_prefix + ".handler.content_type")) + http_response_headers_override[Poco::Net::HTTPMessage::CONTENT_TYPE] = config.getString(config_prefix + ".handler.content_type"); + + return http_response_headers_override; +} + +HTTPResponseHeaderSetup parseHTTPResponseHeaders(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) +{ + std::unordered_map http_response_headers_override = baseParseHTTPResponseHeaders(config, config_prefix); + + if (http_response_headers_override.empty()) + return {}; + + return std::move(http_response_headers_override); +} + +std::unordered_map parseHTTPResponseHeaders( + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_content_type) +{ + std::unordered_map http_response_headers_override = baseParseHTTPResponseHeaders(config, config_prefix); + + if (!http_response_headers_override.contains(Poco::Net::HTTPMessage::CONTENT_TYPE)) + http_response_headers_override[Poco::Net::HTTPMessage::CONTENT_TYPE] = default_content_type; + + return http_response_headers_override; +} + +std::unordered_map parseHTTPResponseHeaders(const std::string & default_content_type) +{ + return {{{Poco::Net::HTTPMessage::CONTENT_TYPE, default_content_type}}}; +} + +void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const HTTPResponseHeaderSetup & setup) +{ + if (setup) + for (const auto & [header_name, header_value] : *setup) + response.set(header_name, header_value); +} + +void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const std::unordered_map & setup) +{ + for (const auto & [header_name, header_value] : setup) + response.set(header_name, header_value); +} + +} diff --git a/src/Server/HTTPResponseHeaderWriter.h b/src/Server/HTTPResponseHeaderWriter.h new file mode 100644 index 00000000000..06281abb42d --- /dev/null +++ b/src/Server/HTTPResponseHeaderWriter.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +using HTTPResponseHeaderSetup = std::optional>; + +HTTPResponseHeaderSetup parseHTTPResponseHeaders(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); + +std::unordered_map parseHTTPResponseHeaders( + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_content_type); + +std::unordered_map parseHTTPResponseHeaders(const std::string & default_content_type); + +void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const HTTPResponseHeaderSetup & setup); + +void applyHTTPResponseHeaders(Poco::Net::HTTPResponse & response, const std::unordered_map & setup); +} diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 0d79aaa227b..45c28babe3a 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -81,7 +80,6 @@ void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPSer void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { setThreadName("IntersrvHandler"); - ThreadStatus thread_status; /// In order to work keep-alive. if (request.getVersion() == HTTPServerRequest::HTTP_1_1) diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 6709cd298e5..47064b467e7 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -13,11 +13,9 @@ #include #include #include -#include #include #include #include -#include #include #include #include @@ -30,6 +28,11 @@ #include #endif +namespace ProfileEvents +{ + extern const Event KeeperTotalElapsedMicroseconds; +} + namespace DB { @@ -309,7 +312,6 @@ Poco::Timespan KeeperTCPHandler::receiveHandshake(int32_t handshake_length, bool void KeeperTCPHandler::runImpl() { setThreadName("KeeperHandler"); - ThreadStatus thread_status; socket().setReceiveTimeout(receive_timeout); socket().setSendTimeout(send_timeout); @@ -412,12 +414,12 @@ void KeeperTCPHandler::runImpl() keeper_dispatcher->registerSession(session_id, response_callback); Stopwatch logging_stopwatch; + auto operation_max_ms = keeper_dispatcher->getKeeperContext()->getCoordinationSettings()->log_slow_connection_operation_threshold_ms; auto log_long_operation = [&](const String & operation) { - constexpr UInt64 operation_max_ms = 500; auto elapsed_ms = logging_stopwatch.elapsedMilliseconds(); if (operation_max_ms < elapsed_ms) - LOG_TEST(log, "{} for session {} took {} ms", operation, session_id, elapsed_ms); + LOG_INFO(log, "{} for session {} took {} ms", operation, session_id, elapsed_ms); logging_stopwatch.restart(); }; @@ -612,11 +614,13 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response /// update statistics ignoring watch response and heartbeat. if (response->xid != Coordination::WATCH_XID && response->getOpNum() != Coordination::OpNum::Heartbeat) { - Int64 elapsed = (Poco::Timestamp() - operations[response->xid]) / 1000; - conn_stats.updateLatency(elapsed); + Int64 elapsed = (Poco::Timestamp() - operations[response->xid]); + ProfileEvents::increment(ProfileEvents::KeeperTotalElapsedMicroseconds, elapsed); + Int64 elapsed_ms = elapsed / 1000; + conn_stats.updateLatency(elapsed_ms); operations.erase(response->xid); - keeper_dispatcher->updateKeeperStatLatency(elapsed); + keeper_dispatcher->updateKeeperStatLatency(elapsed_ms); last_op.set(std::make_unique(LastOp{ .name = Coordination::toString(response->getOpNum()), diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 6456f6d24ff..c0f015bfcd5 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -199,13 +198,16 @@ MySQLHandler::~MySQLHandler() = default; void MySQLHandler::run() { setThreadName("MySQLHandler"); - ThreadStatus thread_status; session = std::make_unique(server.context(), ClientInfo::Interface::MYSQL); SCOPE_EXIT({ session.reset(); }); session->setClientConnectionId(connection_id); + const Settings & settings = server.context()->getSettingsRef(); + socket().setReceiveTimeout(settings.receive_timeout); + socket().setSendTimeout(settings.send_timeout); + in = std::make_shared(socket(), read_event); out = std::make_shared(socket(), write_event); packet_endpoint = std::make_shared(*in, *out, sequence_id); @@ -453,6 +455,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) // Settings replacements if (!should_replace) + { for (auto const & [mysql_setting, clickhouse_setting] : settings_replacements) { const auto replacement_query_opt = setSettingReplacementQuery(query, mysql_setting, clickhouse_setting); @@ -463,6 +466,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) break; } } + } auto query_context = session->makeQueryContext(); query_context->setCurrentQueryId(fmt::format("mysql:{}:{}", connection_id, toString(UUIDHelpers::generateV4()))); @@ -472,6 +476,10 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) settings.prefer_column_name_to_alias = true; query_context->setSettings(settings); + /// Update timeouts + socket().setReceiveTimeout(settings.receive_timeout); + socket().setSendTimeout(settings.send_timeout); + CurrentThread::QueryScope query_scope{query_context}; std::atomic affected_rows {0}; @@ -645,7 +653,11 @@ void MySQLHandlerSSL::finishHandshakeSSL( client_capabilities = ssl_request.capability_flags; max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH; secure_connection = true; + ss = std::make_shared(SecureStreamSocket::attach(socket(), SSLManager::instance().defaultServerContext())); + ss->setReceiveTimeout(socket().getReceiveTimeout()); + ss->setSendTimeout(socket().getSendTimeout()); + in = std::make_shared(*ss); out = std::make_shared(*ss); sequence_id = 2; diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 473d681ddb2..8ba8421e6f0 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -59,7 +58,6 @@ void PostgreSQLHandler::changeIO(Poco::Net::StreamSocket & socket) void PostgreSQLHandler::run() { setThreadName("PostgresHandler"); - ThreadStatus thread_status; session = std::make_unique(server.context(), ClientInfo::Interface::POSTGRESQL); SCOPE_EXIT({ session.reset(); }); diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index 67bf3875de4..331b7a84857 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -2,7 +2,7 @@ #include "IServer.h" #include "HTTPHandlerFactory.h" -#include "HTTPHandlerRequestFilter.h" +#include "HTTPResponseHeaderWriter.h" #include #include @@ -14,6 +14,7 @@ #include +#include #include #include #include @@ -94,7 +95,7 @@ void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServer try { - response.setContentType(content_type); + applyHTTPResponseHeaders(response, http_response_headers_override); if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1) response.setChunkedTransferEncoding(true); @@ -155,8 +156,9 @@ void StaticRequestHandler::writeResponse(WriteBuffer & out) writeString(response_expression, out); } -StaticRequestHandler::StaticRequestHandler(IServer & server_, const String & expression, int status_, const String & content_type_) - : server(server_), status(status_), content_type(content_type_), response_expression(expression) +StaticRequestHandler::StaticRequestHandler( + IServer & server_, const String & expression, const std::unordered_map & http_response_headers_override_, int status_) + : server(server_), status(status_), http_response_headers_override(http_response_headers_override_), response_expression(expression) { } @@ -166,12 +168,12 @@ HTTPRequestHandlerFactoryPtr createStaticHandlerFactory(IServer & server, { int status = config.getInt(config_prefix + ".handler.status", 200); std::string response_content = config.getRawString(config_prefix + ".handler.response_content", "Ok.\n"); - std::string response_content_type = config.getString(config_prefix + ".handler.content_type", "text/plain; charset=UTF-8"); - auto creator = [&server, response_content, status, response_content_type]() -> std::unique_ptr - { - return std::make_unique(server, response_content, status, response_content_type); - }; + std::unordered_map http_response_headers_override + = parseHTTPResponseHeaders(config, config_prefix, "text/plain; charset=UTF-8"); + + auto creator = [&server, http_response_headers_override, response_content, status]() -> std::unique_ptr + { return std::make_unique(server, response_content, http_response_headers_override, status); }; auto factory = std::make_shared>(std::move(creator)); diff --git a/src/Server/StaticRequestHandler.h b/src/Server/StaticRequestHandler.h index 38d774bb0aa..41fb395d969 100644 --- a/src/Server/StaticRequestHandler.h +++ b/src/Server/StaticRequestHandler.h @@ -1,9 +1,9 @@ #pragma once +#include #include #include - namespace DB { @@ -17,15 +17,16 @@ private: IServer & server; int status; - String content_type; + /// Overrides for response headers. + std::unordered_map http_response_headers_override; String response_expression; public: StaticRequestHandler( IServer & server, const String & expression, - int status_ = 200, - const String & content_type_ = "text/html; charset=UTF-8"); + const std::unordered_map & http_response_headers_override_, + int status_ = 200); void writeResponse(WriteBuffer & out); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index e3a820340ad..a522a3f8782 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1,9 +1,8 @@ -#include "Interpreters/AsynchronousInsertQueue.h" -#include "Interpreters/SquashingTransform.h" -#include "Parsers/ASTInsertQuery.h" +#include +#include +#include #include #include -#include #include #include #include @@ -246,7 +245,6 @@ TCPHandler::~TCPHandler() void TCPHandler::runImpl() { setThreadName("TCPHandler"); - ThreadStatus thread_status; extractConnectionSettingsFromContext(server.context()); @@ -389,7 +387,7 @@ void TCPHandler::runImpl() query_scope.emplace(query_context, /* fatal_error_callback */ [this] { - std::lock_guard lock(fatal_error_mutex); + std::lock_guard lock(out_mutex); sendLogs(); }); @@ -477,7 +475,7 @@ void TCPHandler::runImpl() Stopwatch watch; CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::ReadTaskRequestsSent); - std::lock_guard lock(task_callback_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED) return {}; @@ -493,7 +491,7 @@ void TCPHandler::runImpl() { Stopwatch watch; CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::MergeTreeAllRangesAnnouncementsSent); - std::lock_guard lock(task_callback_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED) return; @@ -507,7 +505,7 @@ void TCPHandler::runImpl() { Stopwatch watch; CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::MergeTreeReadTaskRequestsSent); - std::lock_guard lock(task_callback_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED) return std::nullopt; @@ -555,7 +553,7 @@ void TCPHandler::runImpl() { auto callback = [this]() { - std::scoped_lock lock(task_callback_mutex, fatal_error_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (getQueryCancellationStatus() == CancellationStatus::FULLY_CANCELLED) return true; @@ -574,7 +572,7 @@ void TCPHandler::runImpl() finish_or_cancel(); - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); /// Send final progress after calling onFinish(), since it will update the progress. /// @@ -597,7 +595,7 @@ void TCPHandler::runImpl() break; { - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); sendLogs(); sendEndOfStream(); } @@ -886,13 +884,16 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - SquashingTransform squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); while (readDataNext()) { - auto result = squashing.add(std::move(state.block_for_insert)); - if (result) + squashing.header = state.block_for_insert; + auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + if (planned_chunk.hasChunkInfo()) { + Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -901,7 +902,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto result = squashing.add({}); + auto planned_chunk = squashing.flush(); + Chunk result_chunk; + if (planned_chunk.hasChunkInfo()) + result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } @@ -1008,7 +1014,7 @@ void TCPHandler::processOrdinaryQuery() if (query_context->getSettingsRef().allow_experimental_query_deduplication) { - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); sendPartUUIDs(); } @@ -1018,13 +1024,13 @@ void TCPHandler::processOrdinaryQuery() if (header) { - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); sendData(header); } } /// Defer locking to cover a part of the scope below and everything after it - std::unique_lock progress_lock(task_callback_mutex, std::defer_lock); + std::unique_lock out_lock(out_mutex, std::defer_lock); { PullingAsyncPipelineExecutor executor(pipeline); @@ -1050,6 +1056,9 @@ void TCPHandler::processOrdinaryQuery() executor.cancelReading(); } + lock.unlock(); + out_lock.lock(); + if (after_send_progress.elapsed() / 1000 >= interactive_delay) { /// Some time passed and there is a progress. @@ -1065,12 +1074,14 @@ void TCPHandler::processOrdinaryQuery() if (!state.io.null_format) sendData(block); } + + out_lock.unlock(); } /// This lock wasn't acquired before and we make .lock() call here /// so everything under this line is covered even together /// with sendProgress() out of the scope - progress_lock.lock(); + out_lock.lock(); /** If data has run out, we will send the profiling data and total values to * the last zero block to be able to use @@ -1479,7 +1490,7 @@ void TCPHandler::receiveHello() try { session->authenticate( - SSLCertificateCredentials{user, secure_socket.peerCertificate().commonName()}, + SSLCertificateCredentials{user, extractSSLCertificateSubjects(secure_socket.peerCertificate())}, getClientAddress(client_info)); return; } diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 191617f1905..74afb5a14a5 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -20,6 +19,7 @@ #include #include +#include "Core/Types.h" #include "IServer.h" #include "Interpreters/AsynchronousInsertQueue.h" #include "Server/TCPProtocolStackData.h" @@ -226,8 +226,13 @@ private: std::optional nonce; String cluster; + /// `out_mutex` protects `out` (WriteBuffer). + /// So it is used for method sendData(), sendProgress(), sendLogs(), etc. + std::mutex out_mutex; + /// `task_callback_mutex` protects tasks callbacks. + /// Inside these callbacks we might also change cancellation status, + /// so it also protects cancellation status checks. std::mutex task_callback_mutex; - std::mutex fatal_error_mutex; /// At the moment, only one ongoing query in the connection is supported at a time. QueryState state; diff --git a/src/Server/TLSHandler.cpp b/src/Server/TLSHandler.cpp new file mode 100644 index 00000000000..b0ed342c251 --- /dev/null +++ b/src/Server/TLSHandler.cpp @@ -0,0 +1,118 @@ +#include + +#include +#include + + +#if USE_SSL +# include +# include +# include +#endif + +#if !defined(USE_SSL) || USE_SSL == 0 +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} +#endif + +DB::TLSHandler::TLSHandler( + const StreamSocket & socket, + [[maybe_unused]] const LayeredConfiguration & config_, + [[maybe_unused]] const std::string & prefix_, + TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket) +#if USE_SSL + , config(config_) + , prefix(prefix_) +#endif + , stack_data(stack_data_) +{ +#if USE_SSL + params.privateKeyFile = config.getString(prefix + SSLManager::CFG_PRIV_KEY_FILE, ""); + params.certificateFile = config.getString(prefix + SSLManager::CFG_CERTIFICATE_FILE, params.privateKeyFile); + if (!params.privateKeyFile.empty() && !params.certificateFile.empty()) + { + // for backwards compatibility + auto ctx = SSLManager::instance().defaultServerContext(); + params.caLocation = config.getString(prefix + SSLManager::CFG_CA_LOCATION, ctx->getCAPaths().caLocation); + + // optional options for which we have defaults defined + params.verificationMode = SSLManager::VAL_VER_MODE; + if (config.hasProperty(prefix + SSLManager::CFG_VER_MODE)) + { + // either: none, relaxed, strict, once + std::string mode = config.getString(prefix + SSLManager::CFG_VER_MODE); + params.verificationMode = Poco::Net::Utility::convertVerificationMode(mode); + } + + params.verificationDepth = config.getInt(prefix + SSLManager::CFG_VER_DEPTH, SSLManager::VAL_VER_DEPTH); + params.loadDefaultCAs = config.getBool(prefix + SSLManager::CFG_ENABLE_DEFAULT_CA, SSLManager::VAL_ENABLE_DEFAULT_CA); + params.cipherList = config.getString(prefix + SSLManager::CFG_CIPHER_LIST, SSLManager::VAL_CIPHER_LIST); + params.cipherList = config.getString(prefix + SSLManager::CFG_CYPHER_LIST, params.cipherList); // for backwards compatibility + + bool require_tlsv1 = config.getBool(prefix + SSLManager::CFG_REQUIRE_TLSV1, false); + bool require_tlsv1_1 = config.getBool(prefix + SSLManager::CFG_REQUIRE_TLSV1_1, false); + bool require_tlsv1_2 = config.getBool(prefix + SSLManager::CFG_REQUIRE_TLSV1_2, false); + if (require_tlsv1_2) + usage = Context::TLSV1_2_SERVER_USE; + else if (require_tlsv1_1) + usage = Context::TLSV1_1_SERVER_USE; + else if (require_tlsv1) + usage = Context::TLSV1_SERVER_USE; + else + usage = Context::SERVER_USE; + + params.dhParamsFile = config.getString(prefix + SSLManager::CFG_DH_PARAMS_FILE, ""); + params.ecdhCurve = config.getString(prefix + SSLManager::CFG_ECDH_CURVE, ""); + + std::string disabled_protocols_list = config.getString(prefix + SSLManager::CFG_DISABLE_PROTOCOLS, ""); + Poco::StringTokenizer dp_tok(disabled_protocols_list, ";,", Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY); + disabled_protocols = 0; + for (const auto & token : dp_tok) + { + if (token == "sslv2") + disabled_protocols |= Context::PROTO_SSLV2; + else if (token == "sslv3") + disabled_protocols |= Context::PROTO_SSLV3; + else if (token == "tlsv1") + disabled_protocols |= Context::PROTO_TLSV1; + else if (token == "tlsv1_1") + disabled_protocols |= Context::PROTO_TLSV1_1; + else if (token == "tlsv1_2") + disabled_protocols |= Context::PROTO_TLSV1_2; + } + + extended_verification = config.getBool(prefix + SSLManager::CFG_EXTENDED_VERIFICATION, false); + prefer_server_ciphers = config.getBool(prefix + SSLManager::CFG_PREFER_SERVER_CIPHERS, false); + } +#endif +} + + +void DB::TLSHandler::run() +{ +#if USE_SSL + auto ctx = SSLManager::instance().defaultServerContext(); + if (!params.privateKeyFile.empty() && !params.certificateFile.empty()) + { + ctx = SSLManager::instance().getCustomServerContext(prefix); + if (!ctx) + { + ctx = new Context(usage, params); + ctx->disableProtocols(disabled_protocols); + ctx->enableExtendedCertificateVerification(extended_verification); + if (prefer_server_ciphers) + ctx->preferServerCiphers(); + CertificateReloader::instance().tryLoad(config, ctx->sslContext(), prefix); + ctx = SSLManager::instance().setCustomServerContext(prefix, ctx); + } + } + socket() = SecureStreamSocket::attach(socket(), ctx); + stack_data.socket = socket(); + stack_data.certificate = params.certificateFile; +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); +#endif +} diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index dd025e3e165..2bec7380b08 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -1,9 +1,10 @@ #pragma once #include -#include -#include #include +#include + +#include "config.h" #if USE_SSL # include @@ -14,11 +15,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int SUPPORT_IS_DISABLED; -} - class TLSHandler : public Poco::Net::TCPServerConnection { #if USE_SSL @@ -27,30 +23,22 @@ class TLSHandler : public Poco::Net::TCPServerConnection using Context = Poco::Net::Context; #endif using StreamSocket = Poco::Net::StreamSocket; + using LayeredConfiguration = Poco::Util::LayeredConfiguration; public: - explicit TLSHandler(const StreamSocket & socket, const std::string & key_, const std::string & certificate_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket) - , key(key_) - , certificate(certificate_) - , stack_data(stack_data_) - {} + explicit TLSHandler(const StreamSocket & socket, const LayeredConfiguration & config_, const std::string & prefix_, TCPProtocolStackData & stack_data_); + + void run() override; - void run() override - { -#if USE_SSL - auto ctx = SSLManager::instance().defaultServerContext(); - if (!key.empty() && !certificate.empty()) - ctx = new Context(Context::Usage::SERVER_USE, key, certificate, ctx->getCAPaths().caLocation); - socket() = SecureStreamSocket::attach(socket(), ctx); - stack_data.socket = socket(); - stack_data.certificate = certificate; -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - } private: - std::string key [[maybe_unused]]; - std::string certificate [[maybe_unused]]; +#if USE_SSL + Context::Params params [[maybe_unused]]; + Context::Usage usage [[maybe_unused]]; + int disabled_protocols = 0; + bool extended_verification = false; + bool prefer_server_ciphers = false; + const LayeredConfiguration & config [[maybe_unused]]; + std::string prefix [[maybe_unused]]; +#endif TCPProtocolStackData & stack_data [[maybe_unused]]; }; diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h index 19602c7d25e..e8f3a1b7853 100644 --- a/src/Server/TLSHandlerFactory.h +++ b/src/Server/TLSHandlerFactory.h @@ -48,8 +48,8 @@ public: LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); return new TLSHandler( socket, - server.config().getString(conf_name + ".privateKeyFile", ""), - server.config().getString(conf_name + ".certificateFile", ""), + server.config(), + conf_name + ".", stack_data); } catch (const Poco::Net::NetException &) diff --git a/src/Server/grpc_protos/clickhouse_grpc.proto b/src/Server/grpc_protos/clickhouse_grpc.proto index c9ba6f28506..dc17570f833 100644 --- a/src/Server/grpc_protos/clickhouse_grpc.proto +++ b/src/Server/grpc_protos/clickhouse_grpc.proto @@ -90,6 +90,7 @@ message QueryInfo { string user_name = 9; string password = 10; string quota = 11; + string jwt = 25; // Works exactly like sessions in the HTTP protocol. string session_id = 12; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 4879d1a16dc..35a5e95e643 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -25,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -44,7 +45,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_STATISTIC; + extern const int ILLEGAL_STATISTICS; extern const int BAD_ARGUMENTS; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int LOGICAL_ERROR; @@ -263,17 +264,32 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ return command; } - else if (command_ast->type == ASTAlterCommand::ADD_STATISTIC) + else if (command_ast->type == ASTAlterCommand::ADD_STATISTICS) { AlterCommand command; command.ast = command_ast->clone(); - command.statistic_decl = command_ast->statistic_decl->clone(); - command.type = AlterCommand::ADD_STATISTIC; + command.statistics_decl = command_ast->statistics_decl->clone(); + command.type = AlterCommand::ADD_STATISTICS; - const auto & ast_stat_decl = command_ast->statistic_decl->as(); + const auto & ast_stat_decl = command_ast->statistics_decl->as(); - command.statistic_columns = ast_stat_decl.getColumnNames(); - command.statistic_type = ast_stat_decl.type; + command.statistics_columns = ast_stat_decl.getColumnNames(); + command.statistics_types = ast_stat_decl.getTypeNames(); + command.if_not_exists = command_ast->if_not_exists; + + return command; + } + else if (command_ast->type == ASTAlterCommand::MODIFY_STATISTICS) + { + AlterCommand command; + command.ast = command_ast->clone(); + command.statistics_decl = command_ast->statistics_decl->clone(); + command.type = AlterCommand::MODIFY_STATISTICS; + + const auto & ast_stat_decl = command_ast->statistics_decl->as(); + + command.statistics_columns = ast_stat_decl.getColumnNames(); + command.statistics_types = ast_stat_decl.getTypeNames(); command.if_not_exists = command_ast->if_not_exists; return command; @@ -337,17 +353,17 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ return command; } - else if (command_ast->type == ASTAlterCommand::DROP_STATISTIC) + else if (command_ast->type == ASTAlterCommand::DROP_STATISTICS) { AlterCommand command; command.ast = command_ast->clone(); - command.type = AlterCommand::DROP_STATISTIC; - const auto & ast_stat_decl = command_ast->statistic_decl->as(); + command.statistics_decl = command_ast->statistics_decl->clone(); + command.type = AlterCommand::DROP_STATISTICS; + const auto & ast_stat_decl = command_ast->statistics_decl->as(); - command.statistic_columns = ast_stat_decl.getColumnNames(); - command.statistic_type = ast_stat_decl.type; + command.statistics_columns = ast_stat_decl.getColumnNames(); command.if_exists = command_ast->if_exists; - command.clear = command_ast->clear_statistic; + command.clear = command_ast->clear_statistics; if (command_ast->partition) command.partition = command_ast->partition->clone(); @@ -676,41 +692,56 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) metadata.secondary_indices.erase(erase_it); } } - else if (type == ADD_STATISTIC) + else if (type == ADD_STATISTICS) { - for (const auto & statistic_column_name : statistic_columns) + for (const auto & statistics_column_name : statistics_columns) { - if (!metadata.columns.has(statistic_column_name)) + if (!metadata.columns.has(statistics_column_name)) { - throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Cannot add statistic {} with type {}: this column is not found", statistic_column_name, statistic_type); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name); } - if (!if_exists && metadata.columns.get(statistic_column_name).stat) - throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Cannot add statistic {} with type {}: statistic on this column with this type already exists", statistic_column_name, statistic_type); } - auto stats = StatisticDescription::getStatisticsFromAST(statistic_decl, metadata.columns); - for (auto && stat : stats) + auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns); + for (const auto & stats : stats_vec) { - metadata.columns.modify(stat.column_name, - [&](ColumnDescription & column) { column.stat = std::move(stat); }); + metadata.columns.modify(stats.column_name, + [&](ColumnDescription & column) { column.statistics.merge(stats, column.name, column.type, if_not_exists); }); } } - else if (type == DROP_STATISTIC) + else if (type == DROP_STATISTICS) { - for (const auto & stat_column_name : statistic_columns) + for (const auto & statistics_column_name : statistics_columns) { - if (!metadata.columns.has(stat_column_name) || !metadata.columns.get(stat_column_name).stat) + if (!metadata.columns.has(statistics_column_name) + || metadata.columns.get(statistics_column_name).statistics.empty()) { if (if_exists) return; - throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "Wrong statistic name. Cannot find statistic {} with type {} to drop", backQuote(stat_column_name), statistic_type); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Wrong statistics name. Cannot find statistics {} to drop", backQuote(statistics_column_name)); } - if (!partition && !clear) + + if (!clear && !partition) + metadata.columns.modify(statistics_column_name, + [&](ColumnDescription & column) { column.statistics.clear(); }); + } + } + else if (type == MODIFY_STATISTICS) + { + for (const auto & statistics_column_name : statistics_columns) + { + if (!metadata.columns.has(statistics_column_name)) { - metadata.columns.modify(stat_column_name, - [&](ColumnDescription & column) { column.stat = std::nullopt; }); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name); } } + + auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns); + for (const auto & stats : stats_vec) + { + metadata.columns.modify(stats.column_name, + [&](ColumnDescription & column) { column.statistics.assign(stats); }); + } } else if (type == ADD_CONSTRAINT) { @@ -833,8 +864,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) rename_visitor.visit(column_to_modify.default_desc.expression); if (column_to_modify.ttl) rename_visitor.visit(column_to_modify.ttl); - if (column_to_modify.name == column_name && column_to_modify.stat) - column_to_modify.stat->column_name = rename_to; + if (column_to_modify.name == column_name && !column_to_modify.statistics.empty()) + column_to_modify.statistics.column_name = rename_to; }); } if (metadata.table_ttl.definition_ast) @@ -958,7 +989,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada if (isRemovingProperty() || type == REMOVE_TTL || type == REMOVE_SAMPLE_BY) return false; - if (type == DROP_INDEX || type == DROP_PROJECTION || type == RENAME_COLUMN || type == DROP_STATISTIC) + if (type == DROP_INDEX || type == DROP_PROJECTION || type == RENAME_COLUMN || type == DROP_STATISTICS) return true; /// Drop alias is metadata alter, in other case mutation is required. @@ -1026,7 +1057,7 @@ bool AlterCommand::isRemovingProperty() const bool AlterCommand::isDropSomething() const { - return type == Type::DROP_COLUMN || type == Type::DROP_INDEX + return type == Type::DROP_COLUMN || type == Type::DROP_INDEX || type == Type::DROP_STATISTICS || type == Type::DROP_CONSTRAINT || type == Type::DROP_PROJECTION; } @@ -1065,10 +1096,10 @@ std::optional AlterCommand::tryConvertToMutationCommand(Storage result.predicate = nullptr; } - else if (type == DROP_STATISTIC) + else if (type == DROP_STATISTICS) { - result.type = MutationCommand::Type::DROP_STATISTIC; - result.statistic_columns = statistic_columns; + result.type = MutationCommand::Type::DROP_STATISTICS; + result.statistics_columns = statistics_columns; if (clear) result.clear = true; @@ -1583,7 +1614,10 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } } - if (all_columns.empty()) + /// Parameterized views do not have 'columns' in their metadata + bool is_parameterized_view = table->as() && table->as()->isParameterizedView(); + + if (!is_parameterized_view && all_columns.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot DROP or CLEAR all columns"); validateColumnsDefaultsAndGetSampleBlock(default_expr_list, all_columns.getAll(), context); diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 46abffab8ad..a91bac10214 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -38,8 +38,9 @@ struct AlterCommand DROP_CONSTRAINT, ADD_PROJECTION, DROP_PROJECTION, - ADD_STATISTIC, - DROP_STATISTIC, + ADD_STATISTICS, + DROP_STATISTICS, + MODIFY_STATISTICS, MODIFY_TTL, MODIFY_SETTING, RESET_SETTING, @@ -123,9 +124,9 @@ struct AlterCommand /// For ADD/DROP PROJECTION String projection_name; - ASTPtr statistic_decl = nullptr; - std::vector statistic_columns; - String statistic_type; + ASTPtr statistics_decl = nullptr; + std::vector statistics_columns; + std::vector statistics_types; /// For MODIFY TTL ASTPtr ttl = nullptr; diff --git a/src/Storages/ColumnDependency.h b/src/Storages/ColumnDependency.h index b9088dd0227..dcbda7a4b86 100644 --- a/src/Storages/ColumnDependency.h +++ b/src/Storages/ColumnDependency.h @@ -26,8 +26,8 @@ struct ColumnDependency /// TTL is set for @column_name. TTL_TARGET, - /// Exists any statistic, that requires @column_name - STATISTIC, + /// Exists any statistics, that requires @column_name + STATISTICS, }; ColumnDependency(const String & column_name_, Kind kind_) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index a8869970300..c07583cd39d 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -24,7 +25,6 @@ #include #include #include -#include "Parsers/ASTSetQuery.h" #include #include #include @@ -72,7 +72,7 @@ ColumnDescription & ColumnDescription::operator=(const ColumnDescription & other codec = other.codec ? other.codec->clone() : nullptr; settings = other.settings; ttl = other.ttl ? other.ttl->clone() : nullptr; - stat = other.stat; + statistics = other.statistics; return *this; } @@ -95,7 +95,7 @@ ColumnDescription & ColumnDescription::operator=(ColumnDescription && other) noe ttl = other.ttl ? other.ttl->clone() : nullptr; other.ttl.reset(); - stat = std::move(other.stat); + statistics = std::move(other.statistics); return *this; } @@ -107,7 +107,7 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const return name == other.name && type->equals(*other.type) && default_desc == other.default_desc - && stat == other.stat + && statistics == other.statistics && ast_to_str(codec) == ast_to_str(other.codec) && settings == other.settings && ast_to_str(ttl) == ast_to_str(other.ttl); @@ -154,10 +154,10 @@ void ColumnDescription::writeText(WriteBuffer & buf) const DB::writeText(")", buf); } - if (stat) + if (!statistics.empty()) { writeChar('\t', buf); - writeEscapedString(queryToString(stat->ast), buf); + writeEscapedString(queryToString(statistics.getAST()), buf); } if (ttl) @@ -207,6 +207,13 @@ void ColumnDescription::readText(ReadBuffer & buf) if (col_ast->settings) settings = col_ast->settings->as().changes; + + if (col_ast->statistics_desc) + { + statistics = ColumnStatisticsDescription::fromColumnDeclaration(*col_ast, type); + /// every column has name `x` here, so we have to set the name manually. + statistics.column_name = name; + } } else throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description"); diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 79e43d0a4e4..f0760160f0a 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -89,7 +89,7 @@ struct ColumnDescription ASTPtr codec; SettingsChanges settings; ASTPtr ttl; - std::optional stat; + ColumnStatisticsDescription statistics; ColumnDescription() = default; ColumnDescription(const ColumnDescription & other) { *this = other; } diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index d4e737a7de1..c703c9ce999 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 9afafe9f52b..1f7ac23eb82 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include @@ -236,7 +236,7 @@ StorageID IStorage::getStorageID() const return storage_id; } -ConditionEstimator IStorage::getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const +ConditionSelectivityEstimator IStorage::getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const { return {}; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 9d6b3457a24..98afd844046 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -68,7 +68,7 @@ using DatabaseAndTableName = std::pair; class BackupEntriesCollector; class RestorerFromBackup; -class ConditionEstimator; +class ConditionSelectivityEstimator; struct ColumnSize { @@ -135,7 +135,7 @@ public: /// Returns true if the storage supports queries with the PREWHERE section. virtual bool supportsPrewhere() const { return false; } - virtual ConditionEstimator getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const; + virtual ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const; /// Returns which columns supports PREWHERE, or empty std::nullopt if all columns is supported. /// This is needed for engines whose aggregates data from multiple tables, like Merge. diff --git a/src/Storages/Kafka/KafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp index 7075dcb71ca..9ba42b9875e 100644 --- a/src/Storages/Kafka/KafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -1,7 +1,4 @@ -// Needs to go first because its partial specialization of fmt::formatter -// should be defined before any instantiation -#include - +#include #include #include diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index a3bc97779b3..4daf8652c3b 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -1,12 +1,14 @@ #pragma once #include +#include #include #include #include #include +#include #include namespace CurrentMetrics @@ -197,3 +199,6 @@ private: }; } + +template <> struct fmt::formatter : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index c3aacfd67d3..57a1ea302f9 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,7 +21,7 @@ limitations under the License. */ #include #include #include -#include +#include #include #include #include @@ -626,7 +626,7 @@ QueryPipelineBuilder StorageLiveView::completeQuery(Pipes pipes) /// and two-level aggregation is triggered). builder.addSimpleTransform([&](const Block & cur_header) { - return std::make_shared( + return std::make_shared( cur_header, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); diff --git a/src/Storages/MergeTree/ColumnSizeEstimator.h b/src/Storages/MergeTree/ColumnSizeEstimator.h index 1307a5f493e..59a635a00fb 100644 --- a/src/Storages/MergeTree/ColumnSizeEstimator.h +++ b/src/Storages/MergeTree/ColumnSizeEstimator.h @@ -19,18 +19,18 @@ public: size_t sum_index_columns = 0; size_t sum_ordinary_columns = 0; - ColumnSizeEstimator(ColumnToSize && map_, const Names & key_columns, const Names & ordinary_columns) + ColumnSizeEstimator(ColumnToSize && map_, const NamesAndTypesList & key_columns, const NamesAndTypesList & ordinary_columns) : map(std::move(map_)) { - for (const auto & name : key_columns) + for (const auto & [name, _] : key_columns) if (!map.contains(name)) map[name] = 0; - for (const auto & name : ordinary_columns) + for (const auto & [name, _] : ordinary_columns) if (!map.contains(name)) map[name] = 0; - for (const auto & name : key_columns) + for (const auto & [name, _] : key_columns) sum_index_columns += map.at(name); - for (const auto & name : ordinary_columns) + for (const auto & [name, _] : ordinary_columns) sum_ordinary_columns += map.at(name); sum_total = std::max(static_cast(1), sum_index_columns + sum_ordinary_columns); diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 5faa8d4b48b..378a1944396 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -229,7 +229,7 @@ bool DataPartStorageOnDiskBase::isBroken() const bool DataPartStorageOnDiskBase::isReadonly() const { - return volume->getDisk()->isReadOnly(); + return volume->getDisk()->isReadOnly() || volume->getDisk()->isWriteOnce(); } void DataPartStorageOnDiskBase::syncRevision(UInt64 revision) const diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index c276361559c..bdea46a8210 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -375,6 +375,12 @@ void IMergeTreeDataPart::unloadIndex() index_loaded = false; } +bool IMergeTreeDataPart::isIndexLoaded() const +{ + std::scoped_lock lock(index_mutex); + return index_loaded; +} + void IMergeTreeDataPart::setName(const String & new_name) { mutable_name = new_name; @@ -673,16 +679,16 @@ String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(bool with_subc return *minimum_size_column; } -Statistics IMergeTreeDataPart::loadStatistics() const +ColumnsStatistics IMergeTreeDataPart::loadStatistics() const { const auto & metadata_snaphost = storage.getInMemoryMetadata(); auto total_statistics = MergeTreeStatisticsFactory::instance().getMany(metadata_snaphost.getColumns()); - Statistics result; + ColumnsStatistics result; for (auto & stat : total_statistics) { - String file_name = stat->getFileName() + STAT_FILE_SUFFIX; + String file_name = stat->getFileName() + STATS_FILE_SUFFIX; String file_path = fs::path(getDataPartStorage().getRelativePath()) / file_name; if (!metadata_manager->exists(file_name)) @@ -737,10 +743,10 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks { /// Don't scare people with broken part error if (!isRetryableException(std::current_exception())) - LOG_ERROR(storage.log, "Part {} is broken and need manual correction", getDataPartStorage().getFullPath()); + LOG_ERROR(storage.log, "Part {} is broken and needs manual correction", getDataPartStorage().getFullPath()); // There could be conditions that data part to be loaded is broken, but some of meta infos are already written - // into meta data before exception, need to clean them all. + // into metadata before exception, need to clean them all. metadata_manager->deleteAll(/*include_projection*/ true); metadata_manager->assertAllDeleted(/*include_projection*/ true); throw; @@ -1577,7 +1583,7 @@ void IMergeTreeDataPart::loadColumns(bool require) if (getFileNameForColumn(column)) loaded_columns.push_back(column); - if (columns.empty()) + if (loaded_columns.empty()) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); if (!is_readonly_storage) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index bd3814bf415..571f1389e10 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -171,7 +171,7 @@ public: void remove(); - Statistics loadStatistics() const; + ColumnsStatistics loadStatistics() const; /// Initialize columns (from columns.txt if exists, or create from column files if not). /// Load various metadata into memory: checksums from checksums.txt, index if required, etc. @@ -369,6 +369,7 @@ public: void setIndex(const Columns & cols_); void setIndex(Columns && cols_); void unloadIndex(); + bool isIndexLoaded() const; /// For data in RAM ('index') UInt64 getIndexSizeInBytes() const; diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 891ba1b9660..6152da78395 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -119,7 +119,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, @@ -136,7 +136,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, @@ -156,7 +156,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index f04beb37ebb..d9e9a433827 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -84,7 +84,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter( const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, const String & marks_file_extension, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index f8cf19120c7..7e4b1db4c89 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -689,6 +690,11 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( return *res; } +const std::unordered_map KeyCondition::space_filling_curve_name_to_type { + {"mortonEncode", SpaceFillingCurveType::Morton}, + {"hilbertEncode", SpaceFillingCurveType::Hilbert} +}; + ActionsDAGPtr KeyCondition::cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context) { auto res = std::make_shared(); @@ -744,16 +750,17 @@ static NameSet getAllSubexpressionNames(const ExpressionActions & key_expr) void KeyCondition::getAllSpaceFillingCurves() { - /// So far the only supported function is mortonEncode (Morton curve). + /// So far the only supported function is mortonEncode and hilbertEncode (Morton and Hilbert curves). for (const auto & action : key_expr->getActions()) { if (action.node->type == ActionsDAG::ActionType::FUNCTION && action.node->children.size() >= 2 - && action.node->function_base->getName() == "mortonEncode") + && space_filling_curve_name_to_type.contains(action.node->function_base->getName())) { SpaceFillingCurveDescription curve; curve.function_name = action.node->function_base->getName(); + curve.type = space_filling_curve_name_to_type.at(curve.function_name); curve.key_column_pos = key_columns.at(action.node->result_name); for (const auto & child : action.node->children) { @@ -2665,6 +2672,15 @@ BoolMask KeyCondition::checkInHyperrectangle( const DataTypes & data_types) const { std::vector rpn_stack; + + auto curve_type = [&](size_t key_column_pos) + { + for (const auto & curve : key_space_filling_curves) + if (curve.key_column_pos == key_column_pos) + return curve.type; + return SpaceFillingCurveType::Unknown; + }; + for (const auto & element : rpn) { if (element.argument_num_of_space_filling_curve.has_value()) @@ -2764,26 +2780,43 @@ BoolMask KeyCondition::checkInHyperrectangle( UInt64 right = key_range.right.get(); BoolMask mask(false, true); - mortonIntervalToHyperrectangles<2>(left, right, - [&](std::array, 2> morton_hyperrectangle) + auto hyperrectangle_intersection_callback = [&](std::array, 2> curve_hyperrectangle) + { + BoolMask current_intersection(true, false); + for (size_t dim = 0; dim < num_dimensions; ++dim) { - BoolMask current_intersection(true, false); - for (size_t dim = 0; dim < num_dimensions; ++dim) - { - const Range & condition_arg_range = element.space_filling_curve_args_hyperrectangle[dim]; + const Range & condition_arg_range = element.space_filling_curve_args_hyperrectangle[dim]; - const Range morton_arg_range( - morton_hyperrectangle[dim].first, true, - morton_hyperrectangle[dim].second, true); + const Range curve_arg_range( + curve_hyperrectangle[dim].first, true, + curve_hyperrectangle[dim].second, true); - bool intersects = condition_arg_range.intersectsRange(morton_arg_range); - bool contains = condition_arg_range.containsRange(morton_arg_range); + bool intersects = condition_arg_range.intersectsRange(curve_arg_range); + bool contains = condition_arg_range.containsRange(curve_arg_range); - current_intersection = current_intersection & BoolMask(intersects, !contains); - } + current_intersection = current_intersection & BoolMask(intersects, !contains); + } - mask = mask | current_intersection; - }); + mask = mask | current_intersection; + }; + + switch (curve_type(element.key_column)) + { + case SpaceFillingCurveType::Hilbert: + { + hilbertIntervalToHyperrectangles2D(left, right, hyperrectangle_intersection_callback); + break; + } + case SpaceFillingCurveType::Morton: + { + mortonIntervalToHyperrectangles<2>(left, right, hyperrectangle_intersection_callback); + break; + } + case SpaceFillingCurveType::Unknown: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "curve_type is `Unknown`. It is a bug."); + } + } rpn_stack.emplace_back(mask); } diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 2bc3b108e02..6e5956706aa 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -328,11 +328,20 @@ private: const NameSet key_subexpr_names; /// Space-filling curves in the key + enum class SpaceFillingCurveType + { + Unknown = 0, + Morton, + Hilbert + }; + static const std::unordered_map space_filling_curve_name_to_type; + struct SpaceFillingCurveDescription { size_t key_column_pos; String function_name; std::vector arguments; + SpaceFillingCurveType type; }; using SpaceFillingCurveDescriptions = std::vector; SpaceFillingCurveDescriptions key_space_filling_curves; diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h index 626d4e9e689..6b111f348bb 100644 --- a/src/Storages/MergeTree/MarkRange.h +++ b/src/Storages/MergeTree/MarkRange.h @@ -69,7 +69,7 @@ struct fmt::formatter } template - auto format(const DB::MarkRange & range, FormatContext & ctx) + auto format(const DB::MarkRange & range, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}", fmt::format("({}, {})", range.begin, range.end)); } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 2db0c0af3d7..79efb0ca8b3 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -310,7 +310,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() auto table_id = storage.getStorageID(); task_context = Context::createCopy(storage.getContext()); - task_context->makeQueryContext(); + task_context->makeQueryContextForMerge(*storage.getSettings()); task_context->setCurrentQueryId(getQueryId()); task_context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MERGE); diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index a7070c80df9..be44177847c 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -165,7 +165,7 @@ void MergePlainMergeTreeTask::finish() ContextMutablePtr MergePlainMergeTreeTask::createTaskContext() const { auto context = Context::createCopy(storage.getContext()); - context->makeQueryContext(); + context->makeQueryContextForMerge(*storage.getSettings()); auto queryId = getQueryId(); context->setCurrentQueryId(queryId); context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MERGE); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index f1f856da3a2..c8f1a08128b 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -48,59 +48,23 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } - -/// PK columns are sorted and merged, ordinary columns are gathered using info from merge step -static void extractMergingAndGatheringColumns( - const NamesAndTypesList & storage_columns, - const ExpressionActionsPtr & sorting_key_expr, - const IndicesDescription & indexes, - const MergeTreeData::MergingParams & merging_params, - NamesAndTypesList & gathering_columns, Names & gathering_column_names, - NamesAndTypesList & merging_columns, Names & merging_column_names) +static ColumnsStatistics getStatisticsForColumns( + const NamesAndTypesList & columns_to_read, + const StorageMetadataPtr & metadata_snapshot) { - Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns(); - std::set key_columns(sort_key_columns_vec.cbegin(), sort_key_columns_vec.cend()); - for (const auto & index : indexes) + ColumnsStatistics all_statistics; + const auto & all_columns = metadata_snapshot->getColumns(); + + for (const auto & column : columns_to_read) { - Names index_columns_vec = index.expression->getRequiredColumns(); - std::copy(index_columns_vec.cbegin(), index_columns_vec.cend(), - std::inserter(key_columns, key_columns.end())); - } - - /// Force sign column for Collapsing mode - if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) - key_columns.emplace(merging_params.sign_column); - - /// Force version column for Replacing mode - if (merging_params.mode == MergeTreeData::MergingParams::Replacing) - { - key_columns.emplace(merging_params.is_deleted_column); - key_columns.emplace(merging_params.version_column); - } - - /// Force sign column for VersionedCollapsing mode. Version is already in primary key. - if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) - key_columns.emplace(merging_params.sign_column); - - /// Force to merge at least one column in case of empty key - if (key_columns.empty()) - key_columns.emplace(storage_columns.front().name); - - /// TODO: also force "summing" and "aggregating" columns to make Horizontal merge only for such columns - - for (const auto & column : storage_columns) - { - if (key_columns.contains(column.name)) + const auto * desc = all_columns.tryGet(column.name); + if (desc && !desc->statistics.empty()) { - merging_columns.emplace_back(column); - merging_column_names.emplace_back(column.name); - } - else - { - gathering_columns.emplace_back(column); - gathering_column_names.emplace_back(column.name); + auto statistics = MergeTreeStatisticsFactory::instance().get(desc->statistics); + all_statistics.push_back(std::move(statistics)); } } + return all_statistics; } static void addMissedColumnsToSerializationInfos( @@ -129,6 +93,77 @@ static void addMissedColumnsToSerializationInfos( } } +/// PK columns are sorted and merged, ordinary columns are gathered using info from merge step +void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColumns() const +{ + const auto & sorting_key_expr = global_ctx->metadata_snapshot->getSortingKey().expression; + Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns(); + + std::set key_columns(sort_key_columns_vec.cbegin(), sort_key_columns_vec.cend()); + + /// Force sign column for Collapsing mode + if (ctx->merging_params.mode == MergeTreeData::MergingParams::Collapsing) + key_columns.emplace(ctx->merging_params.sign_column); + + /// Force version column for Replacing mode + if (ctx->merging_params.mode == MergeTreeData::MergingParams::Replacing) + { + key_columns.emplace(ctx->merging_params.is_deleted_column); + key_columns.emplace(ctx->merging_params.version_column); + } + + /// Force sign column for VersionedCollapsing mode. Version is already in primary key. + if (ctx->merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) + key_columns.emplace(ctx->merging_params.sign_column); + + /// Force to merge at least one column in case of empty key + if (key_columns.empty()) + key_columns.emplace(global_ctx->storage_columns.front().name); + + const auto & skip_indexes = global_ctx->metadata_snapshot->getSecondaryIndices(); + + for (const auto & index : skip_indexes) + { + auto index_columns = index.expression->getRequiredColumns(); + + /// Calculate indexes that depend only on one column on vertical + /// stage and other indexes on horizonatal stage of merge. + if (index_columns.size() == 1) + { + const auto & column_name = index_columns.front(); + global_ctx->skip_indexes_by_column[column_name].push_back(index); + } + else + { + std::ranges::copy(index_columns, std::inserter(key_columns, key_columns.end())); + global_ctx->merging_skip_indexes.push_back(index); + } + } + + /// TODO: also force "summing" and "aggregating" columns to make Horizontal merge only for such columns + + for (const auto & column : global_ctx->storage_columns) + { + if (key_columns.contains(column.name)) + { + global_ctx->merging_columns.emplace_back(column); + + /// If column is in horizontal stage we need to calculate its indexes on horizontal stage as well + auto it = global_ctx->skip_indexes_by_column.find(column.name); + if (it != global_ctx->skip_indexes_by_column.end()) + { + for (auto & index : it->second) + global_ctx->merging_skip_indexes.push_back(std::move(index)); + + global_ctx->skip_indexes_by_column.erase(it); + } + } + else + { + global_ctx->gathering_columns.emplace_back(column); + } + } +} bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() { @@ -196,27 +231,18 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() if (!global_ctx->parent_part) global_ctx->temporary_directory_lock = global_ctx->data->getTemporaryPartDirectoryHolder(local_tmp_part_basename); - global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical(); global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical(); auto object_columns = MergeTreeData::getConcreteObjectColumns(global_ctx->future_part->parts, global_ctx->metadata_snapshot->getColumns()); - extendObjectColumns(global_ctx->storage_columns, object_columns, false); global_ctx->storage_snapshot = std::make_shared(*global_ctx->data, global_ctx->metadata_snapshot, std::move(object_columns)); - extractMergingAndGatheringColumns( - global_ctx->storage_columns, - global_ctx->metadata_snapshot->getSortingKey().expression, - global_ctx->metadata_snapshot->getSecondaryIndices(), - ctx->merging_params, - global_ctx->gathering_columns, - global_ctx->gathering_column_names, - global_ctx->merging_columns, - global_ctx->merging_column_names); + extractMergingAndGatheringColumns(); global_ctx->new_data_part->uuid = global_ctx->future_part->uuid; global_ctx->new_data_part->partition.assign(global_ctx->future_part->getPartition()); global_ctx->new_data_part->is_temp = global_ctx->parent_part == nullptr; + /// In case of replicated merge tree with zero copy replication /// Here Clickhouse claims that this new part can be deleted in temporary state without unlocking the blobs /// The blobs have to be removed along with the part, this temporary part owns them and does not share them yet. @@ -278,6 +304,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->sum_input_rows_upper_bound = global_ctx->merge_list_element_ptr->total_rows_count; ctx->sum_compressed_bytes_upper_bound = global_ctx->merge_list_element_ptr->total_size_bytes_compressed; + global_ctx->chosen_merge_algorithm = chooseMergeAlgorithm(); global_ctx->merge_list_element_ptr->merge_algorithm.store(global_ctx->chosen_merge_algorithm, std::memory_order_relaxed); @@ -298,9 +325,9 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() case MergeAlgorithm::Horizontal: { global_ctx->merging_columns = global_ctx->storage_columns; - global_ctx->merging_column_names = global_ctx->all_column_names; + global_ctx->merging_skip_indexes = global_ctx->metadata_snapshot->getSecondaryIndices(); global_ctx->gathering_columns.clear(); - global_ctx->gathering_column_names.clear(); + global_ctx->skip_indexes_by_column.clear(); break; } case MergeAlgorithm::Vertical: @@ -309,13 +336,13 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->rows_sources_write_buf = std::make_unique(*ctx->rows_sources_uncompressed_write_buf); std::map local_merged_column_to_size; - for (const MergeTreeData::DataPartPtr & part : global_ctx->future_part->parts) + for (const auto & part : global_ctx->future_part->parts) part->accumulateColumnSizes(local_merged_column_to_size); ctx->column_sizes = ColumnSizeEstimator( std::move(local_merged_column_to_size), - global_ctx->merging_column_names, - global_ctx->gathering_column_names); + global_ctx->merging_columns, + global_ctx->gathering_columns); break; } @@ -323,9 +350,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge algorithm must be chosen"); } - assert(global_ctx->gathering_columns.size() == global_ctx->gathering_column_names.size()); - assert(global_ctx->merging_columns.size() == global_ctx->merging_column_names.size()); - /// If merge is vertical we cannot calculate it ctx->blocks_are_granules_size = (global_ctx->chosen_merge_algorithm == MergeAlgorithm::Vertical); @@ -342,28 +366,25 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() /// resources for this). if (!ctx->need_remove_expired_values) { - size_t expired_columns = 0; auto part_serialization_infos = global_ctx->new_data_part->getSerializationInfos(); + NameSet columns_to_remove; for (auto & [column_name, ttl] : global_ctx->new_data_part->ttl_infos.columns_ttl) { if (ttl.finished()) { global_ctx->new_data_part->expired_columns.insert(column_name); LOG_TRACE(ctx->log, "Adding expired column {} for part {}", column_name, global_ctx->new_data_part->name); - std::erase(global_ctx->gathering_column_names, column_name); - std::erase(global_ctx->merging_column_names, column_name); - std::erase(global_ctx->all_column_names, column_name); + columns_to_remove.insert(column_name); part_serialization_infos.erase(column_name); - ++expired_columns; } } - if (expired_columns) + if (!columns_to_remove.empty()) { - global_ctx->gathering_columns = global_ctx->gathering_columns.filter(global_ctx->gathering_column_names); - global_ctx->merging_columns = global_ctx->merging_columns.filter(global_ctx->merging_column_names); - global_ctx->storage_columns = global_ctx->storage_columns.filter(global_ctx->all_column_names); + global_ctx->gathering_columns = global_ctx->gathering_columns.eraseNames(columns_to_remove); + global_ctx->merging_columns = global_ctx->merging_columns.eraseNames(columns_to_remove); + global_ctx->storage_columns = global_ctx->storage_columns.eraseNames(columns_to_remove); global_ctx->new_data_part->setColumns( global_ctx->storage_columns, @@ -376,8 +397,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->new_data_part, global_ctx->metadata_snapshot, global_ctx->merging_columns, - MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()), - MergeTreeStatisticsFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()), + MergeTreeIndexFactory::instance().getMany(global_ctx->merging_skip_indexes), + getStatisticsForColumns(global_ctx->merging_columns, global_ctx->metadata_snapshot), ctx->compression_codec, global_ctx->txn ? global_ctx->txn->tid : Tx::PrehistoricTID, /*reset_columns=*/ true, @@ -407,9 +428,7 @@ void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const Str return; global_ctx->storage_columns.emplace_back(name, type); - global_ctx->all_column_names.emplace_back(name); global_ctx->gathering_columns.emplace_back(name, type); - global_ctx->gathering_column_names.emplace_back(name); } @@ -423,7 +442,6 @@ MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::g new_ctx->compression_codec = std::move(ctx->compression_codec); new_ctx->tmp_disk = std::move(ctx->tmp_disk); new_ctx->it_name_and_type = std::move(ctx->it_name_and_type); - new_ctx->column_num_for_vertical_merge = std::move(ctx->column_num_for_vertical_merge); new_ctx->read_with_direct_io = std::move(ctx->read_with_direct_io); new_ctx->need_sync = std::move(ctx->need_sync); @@ -510,12 +528,12 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const size_t sum_input_rows_exact = global_ctx->merge_list_element_ptr->rows_read; size_t input_rows_filtered = *global_ctx->input_rows_filtered; - global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_column_names.size(); + global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_columns.size(); global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed); - ctx->rows_sources_write_buf->next(); - ctx->rows_sources_uncompressed_write_buf->next(); /// Ensure data has written to disk. + ctx->rows_sources_write_buf->finalize(); + ctx->rows_sources_uncompressed_write_buf->finalize(); ctx->rows_sources_uncompressed_write_buf->finalize(); size_t rows_sources_count = ctx->rows_sources_write_buf->count(); @@ -537,23 +555,21 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const if (!reread_buf) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read temporary file {}", ctx->rows_sources_uncompressed_write_buf->getFileName()); - auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); + auto * reread_buffer_raw = dynamic_cast(reread_buf.get()); if (!reread_buffer_raw) { const auto & reread_buf_ref = *reread_buf; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFile, but got {}", demangle(typeid(reread_buf_ref).name())); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ReadBufferFromFileBase, but got {}", demangle(typeid(reread_buf_ref).name())); } /// Move ownership from std::unique_ptr to std::unique_ptr for CompressedReadBufferFromFile. /// First, release ownership from unique_ptr to base type. reread_buf.release(); /// NOLINT(bugprone-unused-return-value,hicpp-ignored-remove-result): we already have the pointer value in `reread_buffer_raw` + /// Then, move ownership to unique_ptr to concrete type. - std::unique_ptr reread_buffer_from_file(reread_buffer_raw); + std::unique_ptr reread_buffer_from_file(reread_buffer_raw); + /// CompressedReadBufferFromFile expects std::unique_ptr as argument. ctx->rows_sources_read_buf = std::make_unique(std::move(reread_buffer_from_file)); - - /// For external cycle - global_ctx->gathering_column_names_size = global_ctx->gathering_column_names.size(); - ctx->column_num_for_vertical_merge = 0; ctx->it_name_and_type = global_ctx->gathering_columns.cbegin(); const auto & settings = global_ctx->context->getSettingsRef(); @@ -636,6 +652,21 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const pipe.addTransform(std::move(transform)); + MergeTreeIndices indexes_to_recalc; + auto indexes_it = global_ctx->skip_indexes_by_column.find(column_name); + + if (indexes_it != global_ctx->skip_indexes_by_column.end()) + { + indexes_to_recalc = MergeTreeIndexFactory::instance().getMany(indexes_it->second); + + pipe.addTransform(std::make_shared( + pipe.getHeader(), + indexes_it->second.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), + global_ctx->data->getContext()))); + + pipe.addTransform(std::make_shared(pipe.getHeader())); + } + ctx->column_parts_pipeline = QueryPipeline(std::move(pipe)); /// Dereference unique_ptr @@ -646,19 +677,16 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const /// Is calculated inside MergeProgressCallback. ctx->column_parts_pipeline.disableProfileEventUpdate(); - ctx->executor = std::make_unique(ctx->column_parts_pipeline); + NamesAndTypesList columns_list = {*ctx->it_name_and_type}; ctx->column_to = std::make_unique( global_ctx->new_data_part, global_ctx->metadata_snapshot, - ctx->executor->getHeader(), + columns_list, ctx->compression_codec, - /// we don't need to recalc indices here - /// because all of them were already recalculated and written - /// as key part of vertical merge - std::vector{}, - std::vector{}, /// TODO: think about it + indexes_to_recalc, + getStatisticsForColumns(columns_list, global_ctx->metadata_snapshot), &global_ctx->written_offset_columns, global_ctx->to->getIndexGranularity()); @@ -716,8 +744,7 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const global_ctx->merge_list_element_ptr->bytes_written_uncompressed += bytes; global_ctx->merge_list_element_ptr->progress.store(ctx->progress_before + ctx->column_sizes->columnWeight(column_name), std::memory_order_relaxed); - /// This is the external cycle increment. - ++ctx->column_num_for_vertical_merge; + /// This is the external loop increment. ++ctx->it_name_and_type; } @@ -749,9 +776,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c LOG_DEBUG(ctx->log, "Merge sorted {} rows, containing {} columns ({} merged, {} gathered) in {} sec., {} rows/sec., {}/sec.", global_ctx->merge_list_element_ptr->rows_read, - global_ctx->all_column_names.size(), - global_ctx->merging_column_names.size(), - global_ctx->gathering_column_names.size(), + global_ctx->storage_columns.size(), + global_ctx->merging_columns.size(), + global_ctx->gathering_columns.size(), elapsed_seconds, global_ctx->merge_list_element_ptr->rows_read / elapsed_seconds, ReadableSize(global_ctx->merge_list_element_ptr->bytes_read_uncompressed / elapsed_seconds)); @@ -888,7 +915,7 @@ bool MergeTask::VerticalMergeStage::executeVerticalMergeForAllColumns() const return false; /// This is the external cycle condition - if (ctx->column_num_for_vertical_merge >= global_ctx->gathering_column_names_size) + if (ctx->it_name_and_type == global_ctx->gathering_columns.end()) return false; switch (ctx->vertical_merge_one_column_state) @@ -976,7 +1003,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() *global_ctx->data, global_ctx->storage_snapshot, part, - global_ctx->merging_column_names, + global_ctx->merging_columns.getNames(), /*mark_ranges=*/ {}, global_ctx->input_rows_filtered, /*apply_deleted_mask=*/ true, @@ -1115,12 +1142,12 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() /// If deduplicate_by_columns is empty, add all columns except virtuals. if (global_ctx->deduplicate_by_columns.empty()) { - for (const auto & column_name : global_ctx->merging_column_names) + for (const auto & column : global_ctx->merging_columns) { - if (virtuals.tryGet(column_name, VirtualsKind::Persistent)) + if (virtuals.tryGet(column.name, VirtualsKind::Persistent)) continue; - global_ctx->deduplicate_by_columns.emplace_back(column_name); + global_ctx->deduplicate_by_columns.emplace_back(column.name); } } @@ -1141,11 +1168,13 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() builder->addTransform(std::move(transform)); } - if (global_ctx->metadata_snapshot->hasSecondaryIndices()) + if (!global_ctx->merging_skip_indexes.empty()) { - const auto & indices = global_ctx->metadata_snapshot->getSecondaryIndices(); builder->addTransform(std::make_shared( - builder->getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext()))); + builder->getHeader(), + global_ctx->merging_skip_indexes.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), + global_ctx->data->getContext()))); + builder->addTransform(std::make_shared(builder->getHeader())); } diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 1294fa30449..56909d1b7a0 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -24,6 +24,7 @@ #include #include #include +#include namespace DB @@ -137,7 +138,7 @@ private: virtual ~IStage() = default; }; - /// By default this context is uninitialed, but some variables has to be set after construction, + /// By default this context is uninitialized, but some variables has to be set after construction, /// some variables are used in a process of execution /// Proper initialization is responsibility of the author struct GlobalRuntimeContext : public IStageRuntimeContext @@ -164,14 +165,13 @@ private: NamesAndTypesList gathering_columns{}; NamesAndTypesList merging_columns{}; - Names gathering_column_names{}; - Names merging_column_names{}; NamesAndTypesList storage_columns{}; - Names all_column_names{}; MergeTreeData::DataPart::Checksums checksums_gathered_columns{}; + IndicesDescription merging_skip_indexes; + std::unordered_map skip_indexes_by_column; + MergeAlgorithm chosen_merge_algorithm{MergeAlgorithm::Undecided}; - size_t gathering_column_names_size{0}; std::unique_ptr horizontal_stage_progress{nullptr}; std::unique_ptr column_progress{nullptr}; @@ -199,7 +199,7 @@ private: using GlobalRuntimeContextPtr = std::shared_ptr; - /// By default this context is uninitialed, but some variables has to be set after construction, + /// By default this context is uninitialized, but some variables has to be set after construction, /// some variables are used in a process of execution /// Proper initialization is responsibility of the author struct ExecuteAndFinalizeHorizontalPartRuntimeContext : public IStageRuntimeContext @@ -232,7 +232,6 @@ private: /// Dependencies for next stages std::list::const_iterator it_name_and_type; - size_t column_num_for_vertical_merge{0}; bool need_sync{false}; }; @@ -260,19 +259,21 @@ private: MergeAlgorithm chooseMergeAlgorithm() const; void createMergedStream(); + void extractMergingAndGatheringColumns() const; void setRuntimeContext(StageRuntimeContextPtr local, StageRuntimeContextPtr global) override { ctx = static_pointer_cast(local); global_ctx = static_pointer_cast(global); } + StageRuntimeContextPtr getContextForNextStage() override; ExecuteAndFinalizeHorizontalPartRuntimeContextPtr ctx; GlobalRuntimeContextPtr global_ctx; }; - /// By default this context is uninitialed, but some variables has to be set after construction, + /// By default this context is uninitialized, but some variables has to be set after construction, /// some variables are used in a process of execution /// Proper initialization is responsibility of the author struct VerticalMergeRuntimeContext : public IStageRuntimeContext @@ -284,7 +285,6 @@ private: CompressionCodecPtr compression_codec; TemporaryDataOnDiskPtr tmp_disk{nullptr}; std::list::const_iterator it_name_and_type; - size_t column_num_for_vertical_merge{0}; bool read_with_direct_io{false}; bool need_sync{false}; /// End dependencies from previous stages @@ -348,7 +348,7 @@ private: GlobalRuntimeContextPtr global_ctx; }; - /// By default this context is uninitialed, but some variables has to be set after construction, + /// By default this context is uninitialized, but some variables has to be set after construction, /// some variables are used in a process of execution /// Proper initialization is responsibility of the author struct MergeProjectionsRuntimeContext : public IStageRuntimeContext diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 449b51f9b62..909a8a48bda 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -73,7 +73,7 @@ #include #include #include -#include +#include #include #include #include @@ -283,13 +283,12 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re } } - - // When data path or file not exists, ignore the format_version check + /// When data path or file not exists, ignore the format_version check if (!attach || !read_format_version) { format_version = min_format_version; - // try to write to first non-readonly disk + /// Try to write to first non-readonly disk for (const auto & disk : getStoragePolicy()->getDisks()) { if (disk->isBroken()) @@ -471,10 +470,10 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const return storage_policy; } -ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate( +ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByPredicate( const StorageSnapshotPtr & storage_snapshot, const ActionsDAGPtr & filter_dag, ContextPtr local_context) const { - if (!local_context->getSettings().allow_statistic_optimize) + if (!local_context->getSettings().allow_statistics_optimize) return {}; const auto & parts = assert_cast(*storage_snapshot->data).parts; @@ -486,23 +485,29 @@ ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate( ASTPtr expression_ast; - ConditionEstimator result; + ConditionSelectivityEstimator result; PartitionPruner partition_pruner(storage_snapshot->metadata, filter_dag, local_context); if (partition_pruner.isUseless()) { /// Read all partitions. for (const auto & part : parts) + try { auto stats = part->loadStatistics(); /// TODO: We only have one stats file for every part. for (const auto & stat : stats) result.merge(part->info.getPartNameV1(), part->rows_count, stat); } + catch (...) + { + tryLogCurrentException(log, fmt::format("while loading statistics on part {}", part->info.getPartNameV1())); + } } else { for (const auto & part : parts) + try { if (!partition_pruner.canBePruned(*part)) { @@ -511,6 +516,10 @@ ConditionEstimator MergeTreeData::getConditionEstimatorByPredicate( result.merge(part->info.getPartNameV1(), part->rows_count, stat); } } + catch (...) + { + tryLogCurrentException(log, fmt::format("while loading statistics on part {}", part->info.getPartNameV1())); + } } return result; @@ -691,8 +700,8 @@ void MergeTreeData::checkProperties( for (const auto & col : new_metadata.columns) { - if (col.stat) - MergeTreeStatisticsFactory::instance().validate(*col.stat, col.type); + if (!col.statistics.empty()) + MergeTreeStatisticsFactory::instance().validate(col.statistics, col.type); } checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key_); @@ -1749,11 +1758,14 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional runner(getActivePartsLoadingThreadPool().get(), "ActiveParts"); + bool all_disks_are_readonly = true; for (size_t i = 0; i < disks.size(); ++i) { const auto & disk_ptr = disks[i]; if (disk_ptr->isBroken()) continue; + if (!disk_ptr->isReadOnly()) + all_disks_are_readonly = false; auto & disk_parts = parts_to_load_by_disk[i]; auto & unexpected_disk_parts = unexpected_parts_to_load_by_disk[i]; @@ -1906,7 +1918,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalrenameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes @@ -1951,7 +1962,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional runner(getUnexpectedPartsLoadingThreadPool().get(), "UnexpectedParts"); for (auto & load_state : unexpected_data_parts) @@ -2017,6 +2033,13 @@ void MergeTreeData::loadUnexpectedDataParts() unexpected_data_parts_cv.notify_all(); } } +catch (...) +{ + LOG_ERROR(log, "Loading of unexpected parts failed. " + "Will terminate to avoid undefined behaviour due to inconsistent set of parts. " + "Exception: {}", getCurrentExceptionMessage(true)); + std::terminate(); +} void MergeTreeData::loadOutdatedDataParts(bool is_async) try @@ -3469,13 +3492,13 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context new_metadata.getColumns().getPhysical(command.column_name)); const auto & old_column = old_metadata.getColumns().get(command.column_name); - if (old_column.stat) + if (!old_column.statistics.empty()) { const auto & new_column = new_metadata.getColumns().get(command.column_name); if (!old_column.type->equals(*new_column.type)) throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, - "ALTER types of column {} with statistic is not not safe " - "because it can change the representation of statistic", + "ALTER types of column {} with statistics is not not safe " + "because it can change the representation of statistics", backQuoteIfNeed(command.column_name)); } } @@ -7040,7 +7063,7 @@ ActionDAGNodes MergeTreeData::getFiltersForPrimaryKeyAnalysis(const InterpreterS filter_nodes.nodes.push_back(&additional_filter_info->actions->findInOutputs(additional_filter_info->column_name)); if (before_where) - filter_nodes.nodes.push_back(&before_where->findInOutputs(where_column_name)); + filter_nodes.nodes.push_back(&before_where->dag.findInOutputs(where_column_name)); return filter_nodes; } @@ -7051,19 +7074,23 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( const StorageSnapshotPtr &, SelectQueryInfo &) const { - if (query_context->getClientInfo().collaborate_with_initiator) - return QueryProcessingStage::Enum::FetchColumns; - - /// Parallel replicas - if (query_context->canUseParallelReplicasOnInitiator() && to_stage >= QueryProcessingStage::WithMergeableState) + /// with new analyzer, Planner make decision regarding parallel replicas usage, and so about processing stage on reading + if (!query_context->getSettingsRef().allow_experimental_analyzer) { - /// ReplicatedMergeTree - if (supportsReplication()) - return QueryProcessingStage::Enum::WithMergeableState; + if (query_context->getClientInfo().collaborate_with_initiator) + return QueryProcessingStage::Enum::FetchColumns; - /// For non-replicated MergeTree we allow them only if parallel_replicas_for_non_replicated_merge_tree is enabled - if (query_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) - return QueryProcessingStage::Enum::WithMergeableState; + /// Parallel replicas + if (query_context->canUseParallelReplicasOnInitiator() && to_stage >= QueryProcessingStage::WithMergeableState) + { + /// ReplicatedMergeTree + if (supportsReplication()) + return QueryProcessingStage::Enum::WithMergeableState; + + /// For non-replicated MergeTree we allow them only if parallel_replicas_for_non_replicated_merge_tree is enabled + if (query_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) + return QueryProcessingStage::Enum::WithMergeableState; + } } return QueryProcessingStage::Enum::FetchColumns; @@ -7086,8 +7113,8 @@ UInt64 MergeTreeData::estimateNumberOfRowsToRead( query_context->getSettingsRef().max_threads); UInt64 total_rows = result_ptr->selected_rows; - if (query_info.limit > 0 && query_info.limit < total_rows) - total_rows = query_info.limit; + if (query_info.trivial_limit > 0 && query_info.trivial_limit < total_rows) + total_rows = query_info.trivial_limit; return total_rows; } @@ -8058,6 +8085,13 @@ void MergeTreeData::checkDropCommandDoesntAffectInProgressMutations(const AlterC throw_exception(mutation_name, "column", command.column_name); } } + else if (command.type == AlterCommand::DROP_STATISTICS) + { + for (const auto & stats_col1 : command.statistics_columns) + for (const auto & stats_col2 : mutation_command.statistics_columns) + if (stats_col1 == stats_col2) + throw_exception(mutation_name, "statistics", stats_col1); + } } } } @@ -8510,7 +8544,7 @@ std::pair MergeTreeData::createE const auto & index_factory = MergeTreeIndexFactory::instance(); MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), - Statistics{}, + ColumnsStatistics{}, compression_codec, txn ? txn->tid : Tx::PrehistoricTID); bool sync_on_insert = settings->fsync_after_insert; @@ -8574,6 +8608,38 @@ void MergeTreeData::unloadPrimaryKeys() } } +size_t MergeTreeData::unloadPrimaryKeysOfOutdatedParts() +{ + /// If the method is already called from another thread, then we don't need to do anything. + std::unique_lock lock(unload_primary_key_mutex, std::defer_lock); + if (!lock.try_lock()) + return 0; + + DataPartsVector parts_to_unload_index; + + { + auto parts_lock = lockParts(); + auto parts_range = getDataPartsStateRange(DataPartState::Outdated); + + for (const auto & part : parts_range) + { + /// Outdated part may be hold by SELECT query and still needs the index. + /// This check requires lock of index_mutex but if outdated part is unique then there is no + /// contention on it, so it's relatively cheap and it's ok to check under a global parts lock. + if (part.unique() && part->isIndexLoaded()) + parts_to_unload_index.push_back(part); + } + } + + for (const auto & part : parts_to_unload_index) + { + const_cast(*part).unloadIndex(); + LOG_TEST(log, "Unloaded primary key for outdated part {}", part->name); + } + + return parts_to_unload_index.size(); +} + void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) { /// Aggregate functions already forbidden, but SimpleAggregateFunction are not diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 440daaf6ced..c8b721038c6 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -426,7 +426,7 @@ public: bool supportsPrewhere() const override { return true; } - ConditionEstimator getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const override; + ConditionSelectivityEstimator getConditionSelectivityEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const override; bool supportsFinal() const override; @@ -1096,8 +1096,13 @@ public: static VirtualColumnsDescription createVirtuals(const StorageInMemoryMetadata & metadata); + /// Unloads primary keys of all parts. void unloadPrimaryKeys(); + /// Unloads primary keys of outdated parts that are not used by any query. + /// Returns the number of parts for which index was unloaded. + size_t unloadPrimaryKeysOfOutdatedParts(); + protected: friend class IMergeTreeDataPart; friend class MergeTreeDataMergerMutator; @@ -1143,7 +1148,7 @@ protected: struct TagByInfo{}; struct TagByStateAndInfo{}; - void initializeDirectoriesAndFormatVersion(const std::string & relative_data_path_, bool attach, const std::string & date_column_name, bool need_create_directories=true); + void initializeDirectoriesAndFormatVersion(const std::string & relative_data_path_, bool attach, const std::string & date_column_name, bool need_create_directories = true); static const MergeTreePartInfo & dataPartPtrToInfo(const DataPartPtr & part) { @@ -1260,6 +1265,8 @@ protected: std::mutex grab_old_parts_mutex; /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; + /// The same for unloadPrimaryKeysOfOutdatedParts. + std::mutex unload_primary_key_mutex; void checkProperties( const StorageInMemoryMetadata & new_metadata, diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index f33f4293023..b327480fa92 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -245,6 +245,8 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const writeBinaryLittleEndian(sum.uncompressed_hash, out); } } + + out.finalize(); } void MergeTreeDataPartChecksums::addFile(const String & file_name, UInt64 file_size, MergeTreeDataPartChecksum::uint128 file_hash) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 4a160e5e229..d628fd6b529 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -59,7 +59,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 149f86cef00..ee1a9b7f8ed 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -64,7 +64,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr & virtual_columns, const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, @@ -298,6 +298,11 @@ std::optional MergeTreeDataPartWide::getColumnModificationTime(const Str std::optional MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const { std::optional filename; + + /// Fallback for the case when serializations was not loaded yet (called from loadColumns()) + if (getSerializations().empty()) + return getStreamNameForColumn(column, {}, DATA_FILE_EXTENSION, getDataPartStorage()); + getSerialization(column.name)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { if (!filename.has_value()) @@ -309,6 +314,7 @@ std::optional MergeTreeDataPartWide::getFileNameForColumn(const NameAndT filename = getStreamNameForColumn(column, substream_path, DATA_FILE_EXTENSION, getDataPartStorage()); } }); + return filename; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index fb0f0ba9154..21d046c76f2 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -20,7 +20,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( const StorageMetadataPtr & metadata_snapshot_, const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc_, - const Statistics & stats_to_recalc, + const ColumnsStatistics & stats_to_recalc, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index a5527b74e69..b440a37222d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -21,7 +21,7 @@ public: const StorageMetadataPtr & metadata_snapshot_, const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc, + const ColumnsStatistics & stats_to_recalc, const String & marks_file_extension, const CompressionCodecPtr & default_codec, const MergeTreeWriterSettings & settings, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index bcf51bfcd3d..a576720294f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -150,7 +150,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( const StorageMetadataPtr & metadata_snapshot_, const VirtualsDescriptionPtr & virtual_columns_, const MergeTreeIndices & indices_to_recalc_, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, @@ -265,7 +265,7 @@ void MergeTreeDataPartWriterOnDisk::initStatistics() stats_streams.emplace_back(std::make_unique>( stats_name, data_part_storage, - stats_name, STAT_FILE_SUFFIX, + stats_name, STATS_FILE_SUFFIX, default_codec, settings.max_compress_block_size, settings.query_write_settings)); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 0c31cabc8c4..bdf0fdb7f32 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -111,7 +111,7 @@ public: const StorageMetadataPtr & metadata_snapshot_, const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, const String & marks_file_extension, const CompressionCodecPtr & default_codec, const MergeTreeWriterSettings & settings, @@ -155,7 +155,7 @@ protected: const MergeTreeIndices skip_indices; - const Statistics stats; + const ColumnsStatistics stats; std::vector stats_streams; const String marks_file_extension; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index afa14d8a98a..5ba326cef0c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -86,7 +86,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( const StorageMetadataPtr & metadata_snapshot_, const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc_, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index 9d18ac76880..ab86ed27c7e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -31,7 +31,7 @@ public: const StorageMetadataPtr & metadata_snapshot, const VirtualsDescriptionPtr & virtual_columns_, const std::vector & indices_to_recalc, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, const String & marks_file_extension, const CompressionCodecPtr & default_codec, const MergeTreeWriterSettings & settings, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 11058c542a6..2e287ff3042 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -760,9 +760,16 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd CurrentMetrics::MergeTreeDataSelectExecutorThreadsScheduled, num_threads); + + /// Instances of ThreadPool "borrow" threads from the global thread pool. + /// We intentionally use scheduleOrThrow here to avoid a deadlock. + /// For example, queries can already be running with threads from the + /// global pool, and if we saturate max_thread_pool_size whilst requesting + /// more in this loop, queries will block infinitely. + /// So we wait until lock_acquire_timeout, and then raise an exception. for (size_t part_index = 0; part_index < parts.size(); ++part_index) { - pool.scheduleOrThrowOnError([&, part_index, thread_group = CurrentThread::getGroup()] + pool.scheduleOrThrow([&, part_index, thread_group = CurrentThread::getGroup()] { setThreadName("MergeTreeIndex"); @@ -774,7 +781,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd CurrentThread::attachToGroupIfDetached(thread_group); process_part(part_index); - }); + }, Priority{}, context->getSettingsRef().lock_acquire_timeout.totalMicroseconds()); } pool.wait(); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 04182062b12..5c8aa32949d 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -469,7 +469,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( if (context->getSettingsRef().materialize_skip_indexes_on_insert) indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices()); - Statistics statistics; + ColumnsStatistics statistics; if (context->getSettingsRef().materialize_statistics_on_insert) statistics = MergeTreeStatisticsFactory::instance().getMany(metadata_snapshot->getColumns()); @@ -503,7 +503,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted); } - if (data.getSettings()->allow_experimental_optimized_row_order) + if (data.getSettings()->optimize_row_order + && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if this optimization messes up specialized MergeTree engines. { RowOrderOptimizer::optimize(block, sort_description, perm); perm_ptr = &perm; @@ -730,7 +731,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterBlocksAlreadySorted); } - if (data.getSettings()->allow_experimental_optimized_row_order) + if (data.getSettings()->optimize_row_order + && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if this optimization messes up specialized MergeTree engines. { RowOrderOptimizer::optimize(block, sort_description, perm); perm_ptr = &perm; @@ -754,7 +756,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( metadata_snapshot, columns, MergeTreeIndices{}, - Statistics{}, /// TODO(hanfei): It should be helpful to write statistics for projection result. + /// TODO(hanfei): It should be helpful to write statistics for projection result. + ColumnsStatistics{}, compression_codec, Tx::PrehistoricTID, false, false, data.getContext()->getWriteSettings()); diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index 6f46ee0c184..8cf58687125 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -566,7 +566,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); const auto & value = const_value.get(); - token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); + token_extractor->substringToBloomFilter(value.data(), value.size(), *out.bloom_filter, true, false); return true; } else if (function_name == "endsWith") @@ -575,7 +575,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); const auto & value = const_value.get(); - token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); + token_extractor->substringToBloomFilter(value.data(), value.size(), *out.bloom_filter, false, true); return true; } else if (function_name == "multiSearchAny" @@ -596,7 +596,15 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( bloom_filters.back().emplace_back(params); const auto & value = element.get(); - token_extractor->stringToBloomFilter(value.data(), value.size(), bloom_filters.back().back()); + + if (function_name == "multiSearchAny") + { + token_extractor->substringToBloomFilter(value.data(), value.size(), bloom_filters.back().back(), false, false); + } + else + { + token_extractor->stringToBloomFilter(value.data(), value.size(), bloom_filters.back().back()); + } } out.set_bloom_filters = std::move(bloom_filters); return true; @@ -625,12 +633,12 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( for (const auto & alternative : alternatives) { bloom_filters.back().emplace_back(params); - token_extractor->stringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back()); + token_extractor->substringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back(), false, false); } out.set_bloom_filters = std::move(bloom_filters); } else - token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter); + token_extractor->substringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter, false, false); return true; } diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index af9ee710f88..47ce24b91eb 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -595,7 +595,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); const auto & value = const_value.get(); - token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); + token_extractor->substringToGinFilter(value.data(), value.size(), *out.gin_filter, true, false); return true; } else if (function_name == "endsWith") @@ -604,7 +604,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); const auto & value = const_value.get(); - token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); + token_extractor->substringToGinFilter(value.data(), value.size(), *out.gin_filter, false, true); return true; } else if (function_name == "multiSearchAny") @@ -622,7 +622,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( gin_filters.back().emplace_back(params); const auto & value = element.get(); - token_extractor->stringToGinFilter(value.data(), value.size(), gin_filters.back().back()); + token_extractor->substringToGinFilter(value.data(), value.size(), gin_filters.back().back(), false, false); } out.set_gin_filters = std::move(gin_filters); return true; @@ -650,14 +650,14 @@ bool MergeTreeConditionFullText::traverseASTEquals( for (const auto & alternative : alternatives) { gin_filters.back().emplace_back(params); - token_extractor->stringToGinFilter(alternative.data(), alternative.size(), gin_filters.back().back()); + token_extractor->substringToGinFilter(alternative.data(), alternative.size(), gin_filters.back().back(), false, false); } out.set_gin_filters = std::move(gin_filters); } else { out.gin_filter = std::make_unique(params); - token_extractor->stringToGinFilter(required_substring.data(), required_substring.size(), *out.gin_filter); + token_extractor->substringToGinFilter(required_substring.data(), required_substring.size(), *out.gin_filter, false, false); } return true; @@ -742,6 +742,7 @@ bool MergeTreeConditionFullText::tryPrepareSetGinFilter( MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const { + /// ------ /// Index type 'inverted' was renamed to 'full_text' in May 2024. /// Tables with old indexes can be loaded during a transition period. We still want let users know that they should drop existing /// indexes and re-create them. Function `createIndexGranule` is called whenever the index is used by queries. Reject the query if we @@ -749,6 +750,7 @@ MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const /// TODO: remove this at the end of 2024. if (index.type == INVERTED_INDEX_NAME) throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'inverted' are no longer supported. Please drop and recreate the index as type 'full-text'"); + /// ------ return std::make_shared(index.name, index.column_names.size(), params); } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 026a1da7196..1f8d6abebd2 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -81,6 +81,8 @@ struct Settings; M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \ + M(String, merge_workload, "", "Name of workload to be used to access resources for merges", 0) \ + M(String, mutation_workload, "", "Name of workload to be used to access resources for mutations", 0) \ \ /** Inserts settings. */ \ M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ @@ -94,6 +96,7 @@ struct Settings; M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background.", 0) \ M(Bool, add_implicit_sign_column_constraint_for_collapsing_engine, false, "If true, add implicit constraint for sign column for CollapsingMergeTree engine.", 0) \ M(Milliseconds, sleep_before_commit_local_part_in_replicated_table_ms, 0, "For testing. Do not change it.", 0) \ + M(Bool, optimize_row_order, false, "Allow reshuffling of rows during part inserts and merges to improve the compressibility of the new part", 0) \ \ /* Part removal settings. */ \ M(UInt64, simultaneous_parts_removal_limit, 0, "Maximum number of parts to remove during one CleanupThread iteration (0 means unlimited).", 0) \ @@ -199,7 +202,6 @@ struct Settings; M(Bool, cache_populated_by_fetch, false, "Only available in ClickHouse Cloud", 0) \ M(Bool, force_read_through_cache_for_merges, false, "Force read-through filesystem cache for merges", 0) \ M(Bool, allow_experimental_replacing_merge_with_cleanup, false, "Allow experimental CLEANUP merges for ReplacingMergeTree with is_deleted column.", 0) \ - M(Bool, allow_experimental_optimized_row_order, false, "Allow reshuffling of rows during part inserts and merges to improve the compressibility of the new part", 0) \ \ /** Compress marks and primary key. */ \ M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index b7dede3cb00..05751e0fa6f 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -54,6 +54,10 @@ void MergeTreeSink::onFinish() finishDelayedChunk(); } +void MergeTreeSink::onCancel() +{ +} + void MergeTreeSink::consume(Chunk chunk) { if (num_blocks_processed > 0) diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 07ab3850df2..cf6715a3415 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -28,6 +28,7 @@ public: void consume(Chunk chunk) override; void onStart() override; void onFinish() override; + void onCancel() override; private: StorageMergeTree & storage; diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index fcf2dd76e3f..e323b9f9ee7 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -133,9 +133,8 @@ private: }; #endif -MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_) - : ISource(processor_->getHeader()) - , processor(std::move(processor_)) +MergeTreeSource::MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_) + : ISource(processor_->getHeader()), processor(std::move(processor_)), log_name(log_name_) { #if defined(OS_LINUX) if (processor->getSettings().use_asynchronous_read_from_pool) @@ -207,7 +206,7 @@ std::optional MergeTreeSource::tryGenerate() try { - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; holder->setResult(processor->read()); } catch (...) @@ -222,7 +221,7 @@ std::optional MergeTreeSource::tryGenerate() } #endif - OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; + OpenTelemetry::SpanHolder span{fmt::format("MergeTreeSource({})::tryGenerate", log_name)}; return processReadResult(processor->read()); } diff --git a/src/Storages/MergeTree/MergeTreeSource.h b/src/Storages/MergeTree/MergeTreeSource.h index 655f0ee6ebe..fc39b4f9b09 100644 --- a/src/Storages/MergeTree/MergeTreeSource.h +++ b/src/Storages/MergeTree/MergeTreeSource.h @@ -12,7 +12,7 @@ struct ChunkAndProgress; class MergeTreeSource final : public ISource { public: - explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_); + explicit MergeTreeSource(MergeTreeSelectProcessorPtr processor_, const std::string & log_name_); ~MergeTreeSource() override; std::string getName() const override; @@ -30,6 +30,7 @@ protected: private: MergeTreeSelectProcessorPtr processor; + const std::string log_name; #if defined(OS_LINUX) struct AsyncReadingState; diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 3844ac18268..a9a5fddace4 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -53,7 +53,7 @@ static Int64 findMinPosition(const NameSet & condition_table_columns, const Name MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( std::unordered_map column_sizes_, const StorageMetadataPtr & metadata_snapshot, - const ConditionEstimator & estimator_, + const ConditionSelectivityEstimator & estimator_, const Names & queried_columns_, const std::optional & supported_columns_, LoggerPtr log_) @@ -92,7 +92,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere; where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere; where_optimizer_context.is_final = select.final(); - where_optimizer_context.use_statistic = context->getSettingsRef().allow_statistic_optimize; + where_optimizer_context.use_statistics = context->getSettingsRef().allow_statistics_optimize; RPNBuilderTreeContext tree_context(context, std::move(block_with_constants), {} /*prepared_sets*/); RPNBuilderTreeNode node(select.where().get(), tree_context); @@ -123,7 +123,7 @@ MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::op where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere; where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere; where_optimizer_context.is_final = is_final; - where_optimizer_context.use_statistic = context->getSettingsRef().allow_statistic_optimize; + where_optimizer_context.use_statistics = context->getSettingsRef().allow_statistics_optimize; RPNBuilderTreeContext tree_context(context); RPNBuilderTreeNode node(&filter_dag->findInOutputs(filter_column_name), tree_context); @@ -273,15 +273,17 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree /// Do not move conditions involving all queried columns. && cond.table_columns.size() < queried_columns.size(); - if (where_optimizer_context.use_statistic) + if (cond.viable) + cond.good = isConditionGood(node, table_columns); + + if (where_optimizer_context.use_statistics) { cond.good = cond.viable; - cond.selectivity = estimator.estimateSelectivity(node); - LOG_TEST(log, "Condition {} has selectivity {}", node.getColumnName(), cond.selectivity); - } - else if (cond.viable) - { - cond.good = isConditionGood(node, table_columns); + + cond.estimated_row_count = estimator.estimateRowCount(node); + + if (node.getASTNode() != nullptr) + LOG_DEBUG(log, "Condition {} has estimated row count {}", node.getASTNode()->dumpTree(), cond.estimated_row_count); } if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere) diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 6c5ff29bc76..ba6b4660924 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -38,7 +38,7 @@ public: MergeTreeWhereOptimizer( std::unordered_map column_sizes_, const StorageMetadataPtr & metadata_snapshot, - const ConditionEstimator & estimator_, + const ConditionSelectivityEstimator & estimator_, const Names & queried_columns_, const std::optional & supported_columns_, LoggerPtr log_); @@ -76,7 +76,7 @@ private: bool good = false; /// the lower the better - Float64 selectivity = 1.0; + Float64 estimated_row_count = 0; /// Does the condition contain primary key column? /// If so, it is better to move it further to the end of PREWHERE chain depending on minimal position in PK of any @@ -85,7 +85,7 @@ private: auto tuple() const { - return std::make_tuple(!viable, !good, -min_position_in_primary_key, selectivity, columns_size, table_columns.size()); + return std::make_tuple(!viable, !good, -min_position_in_primary_key, estimated_row_count, columns_size, table_columns.size()); } /// Is condition a better candidate for moving to PREWHERE? @@ -104,7 +104,7 @@ private: bool move_all_conditions_to_prewhere = false; bool move_primary_key_columns_to_end_of_prewhere = false; bool is_final = false; - bool use_statistic = false; + bool use_statistics = false; }; struct OptimizeResult @@ -147,7 +147,7 @@ private: static NameSet determineArrayJoinedNames(const ASTSelectQuery & select); - const ConditionEstimator estimator; + const ConditionSelectivityEstimator estimator; const NameSet table_columns; const Names queried_columns; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index c5799fab09f..164658c914e 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -20,7 +20,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, - const Statistics & statistics, + const ColumnsStatistics & statistics, CompressionCodecPtr default_codec_, TransactionID tid, bool reset_columns_, diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index c1e3d75fefc..e212fe5bb5a 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -20,7 +20,7 @@ public: const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, - const Statistics & statistics, + const ColumnsStatistics & statistics, CompressionCodecPtr default_codec_, TransactionID tid, bool reset_columns_ = false, diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 674a9bd498f..5ae6517a236 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -13,15 +13,14 @@ namespace ErrorCodes MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, - const Block & header_, + const NamesAndTypesList & columns_list_, CompressionCodecPtr default_codec, const MergeTreeIndices & indices_to_recalc, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, WrittenOffsetColumns * offset_columns_, const MergeTreeIndexGranularity & index_granularity, const MergeTreeIndexGranularityInfo * index_granularity_info) - : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) - , header(header_) + : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, /*reset_columns=*/ true) { const auto & global_settings = data_part->storage.getContext()->getSettings(); @@ -37,7 +36,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), data_part_storage, data_part->index_granularity_info, storage_settings, - header.getNamesAndTypesList(), + columns_list_, data_part->getColumnPositions(), metadata_snapshot_, data_part->storage.getVirtualsPtr(), diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h index ad3cabe459e..e837a62743e 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h @@ -17,24 +17,20 @@ public: MergedColumnOnlyOutputStream( const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, - const Block & header_, + const NamesAndTypesList & columns_list_, CompressionCodecPtr default_codec_, const MergeTreeIndices & indices_to_recalc_, - const Statistics & stats_to_recalc_, + const ColumnsStatistics & stats_to_recalc_, WrittenOffsetColumns * offset_columns_ = nullptr, const MergeTreeIndexGranularity & index_granularity = {}, const MergeTreeIndexGranularityInfo * index_granularity_info_ = nullptr); - Block getHeader() const { return header; } void write(const Block & block) override; MergeTreeData::DataPart::Checksums fillChecksums(MergeTreeData::MutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & all_checksums); void finish(bool sync); - -private: - Block header; }; using MergedColumnOnlyOutputStreamPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 8d40658bb2c..4c96cbf2c97 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -204,7 +204,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() } task_context = Context::createCopy(storage.getContext()); - task_context->makeQueryContext(); + task_context->makeQueryContextForMutate(*storage.getSettings()); task_context->setCurrentQueryId(getQueryId()); task_context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 2fd02708421..20f387137e7 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -136,7 +136,7 @@ bool MutatePlainMergeTreeTask::executeStep() ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const { auto context = Context::createCopy(storage.getContext()); - context->makeQueryContext(); + context->makeQueryContextForMutate(*storage.getSettings()); auto queryId = getQueryId(); context->setCurrentQueryId(queryId); context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 1828b8a7eeb..a552ee89aee 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -29,6 +29,7 @@ #include #include #include +#include namespace ProfileEvents @@ -130,7 +131,7 @@ static void splitAndModifyMutationCommands( } } if (command.type == MutationCommand::Type::MATERIALIZE_INDEX - || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC + || command.type == MutationCommand::Type::MATERIALIZE_STATISTICS || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION || command.type == MutationCommand::Type::MATERIALIZE_TTL || command.type == MutationCommand::Type::DELETE @@ -143,7 +144,7 @@ static void splitAndModifyMutationCommands( } else if (command.type == MutationCommand::Type::DROP_INDEX || command.type == MutationCommand::Type::DROP_PROJECTION - || command.type == MutationCommand::Type::DROP_STATISTIC) + || command.type == MutationCommand::Type::DROP_STATISTICS) { for_file_renames.push_back(command); } @@ -258,7 +259,7 @@ static void splitAndModifyMutationCommands( for_interpreter.push_back(command); } else if (command.type == MutationCommand::Type::MATERIALIZE_INDEX - || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC + || command.type == MutationCommand::Type::MATERIALIZE_STATISTICS || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION || command.type == MutationCommand::Type::MATERIALIZE_TTL || command.type == MutationCommand::Type::DELETE @@ -269,7 +270,7 @@ static void splitAndModifyMutationCommands( } else if (command.type == MutationCommand::Type::DROP_INDEX || command.type == MutationCommand::Type::DROP_PROJECTION - || command.type == MutationCommand::Type::DROP_STATISTIC) + || command.type == MutationCommand::Type::DROP_STATISTICS) { for_file_renames.push_back(command); } @@ -532,16 +533,16 @@ static ExecuteTTLType shouldExecuteTTL(const StorageMetadataPtr & metadata_snaps return has_ttl_expression ? ExecuteTTLType::RECALCULATE : ExecuteTTLType::NONE; } -static std::set getStatisticsToRecalculate(const StorageMetadataPtr & metadata_snapshot, const NameSet & materialized_stats) +static std::set getStatisticsToRecalculate(const StorageMetadataPtr & metadata_snapshot, const NameSet & materialized_stats) { const auto & stats_factory = MergeTreeStatisticsFactory::instance(); - std::set stats_to_recalc; + std::set stats_to_recalc; const auto & columns = metadata_snapshot->getColumns(); for (const auto & col_desc : columns) { - if (col_desc.stat && materialized_stats.contains(col_desc.name)) + if (!col_desc.statistics.empty() && materialized_stats.contains(col_desc.name)) { - stats_to_recalc.insert(stats_factory.get(*col_desc.stat)); + stats_to_recalc.insert(stats_factory.get(col_desc.statistics)); } } return stats_to_recalc; @@ -655,7 +656,7 @@ static NameSet collectFilesToSkip( const std::set & indices_to_recalc, const String & mrk_extension, const std::set & projections_to_recalc, - const std::set & stats_to_recalc) + const std::set & stats_to_recalc) { NameSet files_to_skip = source_part->getFileNamesWithoutChecksums(); @@ -683,7 +684,7 @@ static NameSet collectFilesToSkip( files_to_skip.insert(projection->getDirectoryName()); for (const auto & stat : stats_to_recalc) - files_to_skip.insert(stat->getFileName() + STAT_FILE_SUFFIX); + files_to_skip.insert(stat->getFileName() + STATS_FILE_SUFFIX); if (isWidePart(source_part)) { @@ -772,11 +773,11 @@ static NameToNameVector collectFilesForRenames( if (source_part->checksums.has(command.column_name + ".proj")) add_rename(command.column_name + ".proj", ""); } - else if (command.type == MutationCommand::Type::DROP_STATISTIC) + else if (command.type == MutationCommand::Type::DROP_STATISTICS) { - for (const auto & statistic_column_name : command.statistic_columns) - if (source_part->checksums.has(STAT_FILE_PREFIX + statistic_column_name + STAT_FILE_SUFFIX)) - add_rename(STAT_FILE_PREFIX + statistic_column_name + STAT_FILE_SUFFIX, ""); + for (const auto & statistics_column_name : command.statistics_columns) + if (source_part->checksums.has(STATS_FILE_PREFIX + statistics_column_name + STATS_FILE_SUFFIX)) + add_rename(STATS_FILE_PREFIX + statistics_column_name + STATS_FILE_SUFFIX, ""); } else if (isWidePart(source_part)) { @@ -797,9 +798,9 @@ static NameToNameVector collectFilesForRenames( if (auto serialization = source_part->tryGetSerialization(command.column_name)) serialization->enumerateStreams(callback); - /// if we drop a column with statistic, we should also drop the stat file. - if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX)) - add_rename(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX, ""); + /// if we drop a column with statistics, we should also drop the stat file. + if (source_part->checksums.has(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX)) + add_rename(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX, ""); } else if (command.type == MutationCommand::Type::RENAME_COLUMN) { @@ -833,9 +834,9 @@ static NameToNameVector collectFilesForRenames( if (auto serialization = source_part->tryGetSerialization(command.column_name)) serialization->enumerateStreams(callback); - /// if we rename a column with statistic, we should also rename the stat file. - if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX)) - add_rename(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX, STAT_FILE_PREFIX + command.rename_to + STAT_FILE_SUFFIX); + /// if we rename a column with statistics, we should also rename the stat file. + if (source_part->checksums.has(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX)) + add_rename(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX, STATS_FILE_PREFIX + command.rename_to + STATS_FILE_SUFFIX); } else if (command.type == MutationCommand::Type::READ_COLUMN) { @@ -1022,7 +1023,7 @@ struct MutationContext IMergeTreeDataPart::MinMaxIndexPtr minmax_idx; std::set indices_to_recalc; - std::set stats_to_recalc; + std::set stats_to_recalc; std::set projections_to_recalc; MergeTreeData::DataPart::Checksums existing_indices_stats_checksums; NameSet files_to_skip; @@ -1267,7 +1268,7 @@ private: ProjectionNameToItsBlocks projection_parts; std::move_iterator projection_parts_iterator; - std::vector projection_squashes; + std::vector projection_squashes; const ProjectionsDescription & projections; ExecutableTaskPtr merge_projection_parts_task_ptr; @@ -1286,7 +1287,7 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squashes.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } existing_rows_count = 0; @@ -1311,16 +1312,18 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; - Block projection_block; - { - ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context)); - } + ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); + Block block_to_squash = projection.calculate(cur_block, ctx->context); + projection_squashes[i].header = block_to_squash; + Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - if (projection_block) + if (planned_chunk.hasChunkInfo()) { + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); tmp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); @@ -1338,12 +1341,15 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { const auto & projection = *ctx->projections_to_build[i]; - auto & projection_squash = projection_squashes[i]; - auto projection_block = projection_squash.add({}); - if (projection_block) + auto & projection_squash_plan = projection_squashes[i]; + auto planned_chunk = projection_squash_plan.flush(); + if (planned_chunk.hasChunkInfo()) { + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); temp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); @@ -1473,12 +1479,12 @@ private: { if (command.type == MutationCommand::DROP_INDEX) removed_indices.insert(command.column_name); - else if (command.type == MutationCommand::DROP_STATISTIC) - for (const auto & column_name : command.statistic_columns) + else if (command.type == MutationCommand::DROP_STATISTICS) + for (const auto & column_name : command.statistics_columns) removed_stats.insert(column_name); else if (command.type == MutationCommand::RENAME_COLUMN - && ctx->source_part->checksums.files.contains(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX)) - renamed_stats[STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX] = STAT_FILE_PREFIX + command.rename_to + STAT_FILE_SUFFIX; + && ctx->source_part->checksums.files.contains(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX)) + renamed_stats[STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX] = STATS_FILE_PREFIX + command.rename_to + STATS_FILE_SUFFIX; } bool is_full_part_storage = isFullPartStorage(ctx->new_data_part->getDataPartStorage()); @@ -1514,23 +1520,23 @@ private: } } - Statistics stats_to_rewrite; + ColumnsStatistics stats_to_rewrite; const auto & columns = ctx->metadata_snapshot->getColumns(); for (const auto & col : columns) { - if (!col.stat || removed_stats.contains(col.name)) + if (col.statistics.empty() || removed_stats.contains(col.name)) continue; if (ctx->materialized_statistics.contains(col.name)) { - stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(*col.stat)); + stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col.statistics)); } else { /// We do not hard-link statistics which - /// 1. In `DROP STATISTIC` statement. It is filtered by `removed_stats` + /// 1. In `DROP STATISTICS` statement. It is filtered by `removed_stats` /// 2. Not in column list anymore, including `DROP COLUMN`. It is not touched by this loop. - String stat_file_name = STAT_FILE_PREFIX + col.name + STAT_FILE_SUFFIX; + String stat_file_name = STATS_FILE_PREFIX + col.name + STATS_FILE_SUFFIX; auto it = ctx->source_part->checksums.files.find(stat_file_name); if (it != ctx->source_part->checksums.files.end()) { @@ -1901,10 +1907,10 @@ private: ctx->out = std::make_shared( ctx->new_data_part, ctx->metadata_snapshot, - ctx->updated_header, + ctx->updated_header.getNamesAndTypesList(), ctx->compression_codec, std::vector(ctx->indices_to_recalc.begin(), ctx->indices_to_recalc.end()), - Statistics(ctx->stats_to_recalc.begin(), ctx->stats_to_recalc.end()), + ColumnsStatistics(ctx->stats_to_recalc.begin(), ctx->stats_to_recalc.end()), nullptr, ctx->source_part->index_granularity, &ctx->source_part->index_granularity_info diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index f3318a48883..5a84c6fd684 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -112,7 +112,7 @@ struct fmt::formatter static constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } template - auto format(const DB::Part & part, FormatContext & ctx) + auto format(const DB::Part & part, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{} in replicas [{}]", part.description.describe(), fmt::join(part.replicas, ", ")); } @@ -125,6 +125,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; +extern const int ALL_CONNECTION_TRIES_FAILED; } class ParallelReplicasReadingCoordinator::ImplInterface @@ -1025,7 +1026,11 @@ void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica std::lock_guard lock(mutex); if (!pimpl) + { unavailable_nodes_registered_before_initialization.push_back(replica_number); + if (unavailable_nodes_registered_before_initialization.size() == replicas_count) + throw Exception(ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "Can't connect to any replica chosen for query execution"); + } else pimpl->markReplicaAsUnavailable(replica_number); } diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h index 60343988f03..8b463fda395 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -34,7 +34,7 @@ private: void initialize(CoordinationMode mode); std::mutex mutex; - size_t replicas_count{0}; + const size_t replicas_count{0}; size_t mark_segment_size{0}; std::unique_ptr pimpl; ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation diff --git a/src/Storages/MergeTree/RangesInDataPart.cpp b/src/Storages/MergeTree/RangesInDataPart.cpp index c46385e84ef..50e0781b4e6 100644 --- a/src/Storages/MergeTree/RangesInDataPart.cpp +++ b/src/Storages/MergeTree/RangesInDataPart.cpp @@ -13,7 +13,7 @@ struct fmt::formatter static constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } template - auto format(const DB::RangesInDataPartDescription & range, FormatContext & ctx) + auto format(const DB::RangesInDataPartDescription & range, FormatContext & ctx) const { return fmt::format_to(ctx.out(), "{}", range.describe()); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index e034918ef57..328c03a5b94 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -175,6 +175,8 @@ Float32 ReplicatedMergeTreeCleanupThread::iterate() cleaned_part_like += storage.clearEmptyParts(); } + cleaned_part_like += storage.unloadPrimaryKeysOfOutdatedParts(); + /// We need to measure the number of removed objects somehow (for better scheduling), /// but just summing the number of removed async blocks, logs, and empty parts does not make any sense. /// So we are trying to (approximately) measure the number of inserted blocks/parts, so we will be able to compare apples to apples. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 9a368bd44f5..30ba95c46f0 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -2004,7 +2004,8 @@ MutationCommands ReplicatedMergeTreeQueue::getMutationCommands( MutationCommands commands; for (auto it = begin; it != end; ++it) { - /// FIXME uncomment this assertion after relesing 23.5 (currently it fails in Upgrade check) + /// FIXME : This was supposed to be fixed after releasing 23.5 (it fails in Upgrade check) + /// but it's still present https://github.com/ClickHouse/ClickHouse/issues/65275 /// chassert(mutation_pointer < it->second->entry->znode_name); mutation_ids.push_back(it->second->entry->znode_name); const auto & commands_from_entry = it->second->entry->commands; diff --git a/src/Storages/MergeTree/RowOrderOptimizer.cpp b/src/Storages/MergeTree/RowOrderOptimizer.cpp index 34f9fed4500..76b0d6452ad 100644 --- a/src/Storages/MergeTree/RowOrderOptimizer.cpp +++ b/src/Storages/MergeTree/RowOrderOptimizer.cpp @@ -78,9 +78,8 @@ std::vector getOtherColumnIndexes(const Block & block, const SortDescrip /// -------- /// 2 1 a 3 /// ---------------------- -EqualRanges getEqualRanges(const Block & block, const SortDescription & sort_description, const IColumn::Permutation & permutation, const LoggerPtr & log) +EqualRanges getEqualRanges(const Block & block, const SortDescription & sort_description, const IColumn::Permutation & permutation) { - LOG_TRACE(log, "Finding equal ranges"); EqualRanges ranges; const size_t rows = block.rows(); if (sort_description.empty()) @@ -122,11 +121,10 @@ void updatePermutationInEqualRange( const std::vector & other_column_indexes, IColumn::Permutation & permutation, const EqualRange & equal_range, - const std::vector & cardinalities) + const std::vector & cardinalities, + const LoggerPtr & log) { - LoggerPtr log = getLogger("RowOrderOptimizer"); - - LOG_TRACE(log, "Starting optimization in equal range"); + LOG_TEST(log, "Starting optimization in equal range"); std::vector column_order(other_column_indexes.size()); iota(column_order.begin(), column_order.end(), 0); @@ -134,17 +132,17 @@ void updatePermutationInEqualRange( stable_sort(column_order.begin(), column_order.end(), cmp); std::vector ranges = {equal_range}; - LOG_TRACE(log, "equal_range: .from: {}, .to: {}", equal_range.from, equal_range.to); + LOG_TEST(log, "equal_range: .from: {}, .to: {}", equal_range.from, equal_range.to); for (size_t i : column_order) { const size_t column_id = other_column_indexes[i]; const ColumnPtr & column = block.getByPosition(column_id).column; - LOG_TRACE(log, "i: {}, column_id: {}, column->getName(): {}, cardinality: {}", i, column_id, column->getName(), cardinalities[i]); + LOG_TEST(log, "i: {}, column_id: {}, column type: {}, cardinality: {}", i, column_id, column->getName(), cardinalities[i]); column->updatePermutation( IColumn::PermutationSortDirection::Ascending, IColumn::PermutationSortStability::Stable, 0, 1, permutation, ranges); } - LOG_TRACE(log, "Finish optimization in equal range"); + LOG_TEST(log, "Finish optimization in equal range"); } } @@ -156,7 +154,10 @@ void RowOrderOptimizer::optimize(const Block & block, const SortDescription & so LOG_TRACE(log, "Starting optimization"); if (block.columns() == 0) + { + LOG_TRACE(log, "Finished optimization (block has no columns)"); return; /// a table without columns, this should not happen in the first place ... + } if (permutation.empty()) { @@ -165,17 +166,17 @@ void RowOrderOptimizer::optimize(const Block & block, const SortDescription & so iota(permutation.data(), rows, IColumn::Permutation::value_type(0)); } - const EqualRanges equal_ranges = getEqualRanges(block, sort_description, permutation, log); + const EqualRanges equal_ranges = getEqualRanges(block, sort_description, permutation); const std::vector other_columns_indexes = getOtherColumnIndexes(block, sort_description); - LOG_TRACE(log, "block.columns(): {}, block.rows(): {}, sort_description.size(): {}, equal_ranges.size(): {}", block.columns(), block.rows(), sort_description.size(), equal_ranges.size()); + LOG_TRACE(log, "columns: {}, sorting key columns: {}, rows: {}, equal ranges: {}", block.columns(), sort_description.size(), block.rows(), equal_ranges.size()); for (const auto & equal_range : equal_ranges) { if (equal_range.size() <= 1) continue; const std::vector cardinalities = getCardinalitiesInPermutedRange(block, other_columns_indexes, permutation, equal_range); - updatePermutationInEqualRange(block, other_columns_indexes, permutation, equal_range, cardinalities); + updatePermutationInEqualRange(block, other_columns_indexes, permutation, equal_range, cardinalities, log); } LOG_TRACE(log, "Finished optimization"); diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index aaf5c1b5d87..f736c863eee 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -83,15 +83,15 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.index_name = command->index->as().name(); return res; } - else if (command->type == ASTAlterCommand::MATERIALIZE_STATISTIC) + else if (command->type == ASTAlterCommand::MATERIALIZE_STATISTICS) { MutationCommand res; res.ast = command->ptr(); - res.type = MATERIALIZE_STATISTIC; + res.type = MATERIALIZE_STATISTICS; if (command->partition) res.partition = command->partition->clone(); res.predicate = nullptr; - res.statistic_columns = command->statistic_decl->as().getColumnNames(); + res.statistics_columns = command->statistics_decl->as().getColumnNames(); return res; } else if (command->type == ASTAlterCommand::MATERIALIZE_PROJECTION) @@ -150,16 +150,16 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.clear = true; return res; } - else if (parse_alter_commands && command->type == ASTAlterCommand::DROP_STATISTIC) + else if (parse_alter_commands && command->type == ASTAlterCommand::DROP_STATISTICS) { MutationCommand res; res.ast = command->ptr(); - res.type = MutationCommand::Type::DROP_STATISTIC; + res.type = MutationCommand::Type::DROP_STATISTICS; if (command->partition) res.partition = command->partition->clone(); if (command->clear_index) res.clear = true; - res.statistic_columns = command->statistic_decl->as().getColumnNames(); + res.statistics_columns = command->statistics_decl->as().getColumnNames(); return res; } else if (parse_alter_commands && command->type == ASTAlterCommand::DROP_PROJECTION) diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h index 6e10f7d9b2d..f999aab1f4d 100644 --- a/src/Storages/MutationCommands.h +++ b/src/Storages/MutationCommands.h @@ -30,12 +30,12 @@ struct MutationCommand UPDATE, MATERIALIZE_INDEX, MATERIALIZE_PROJECTION, - MATERIALIZE_STATISTIC, + MATERIALIZE_STATISTICS, READ_COLUMN, /// Read column and apply conversions (MODIFY COLUMN alter query). DROP_COLUMN, DROP_INDEX, DROP_PROJECTION, - DROP_STATISTIC, + DROP_STATISTICS, MATERIALIZE_TTL, RENAME_COLUMN, MATERIALIZE_COLUMN, @@ -51,10 +51,11 @@ struct MutationCommand /// Columns with corresponding actions std::unordered_map column_to_update_expression = {}; - /// For MATERIALIZE INDEX and PROJECTION and STATISTIC + /// For MATERIALIZE INDEX and PROJECTION and STATISTICS String index_name = {}; String projection_name = {}; - std::vector statistic_columns = {}; + std::vector statistics_columns = {}; + std::vector statistics_types = {}; /// For MATERIALIZE INDEX, UPDATE and DELETE. ASTPtr partition = {}; diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index 47b69d79ad8..ba90f21c907 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -95,7 +95,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (asts.empty()) return nullptr; - NamedCollectionUtils::loadIfNot(); + NamedCollectionFactory::instance().loadIfNot(); auto collection_name = getCollectionName(asts); if (!collection_name.has_value()) diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index a1909f514ea..b4aea096c59 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -158,7 +158,7 @@ struct fmt::formatter> } template - auto format(const DB::NamedCollectionValidateKey & elem, FormatContext & context) + auto format(const DB::NamedCollectionValidateKey & elem, FormatContext & context) const { return fmt::format_to(context.out(), "{}", elem.value); } diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index ada3e2e9323..f763a997bfb 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -179,6 +180,7 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a } std::unique_ptr blob_service_client; + size_t pos = connection_url.find('?'); std::shared_ptr managed_identity_credential; if (storage_shared_key_credential) { @@ -186,12 +188,20 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a } else { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(connection_url, managed_identity_credential); + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + auto workload_identity_credential = std::make_shared(); + blob_service_client = std::make_unique(connection_url, workload_identity_credential); + } + else + { + managed_identity_credential = std::make_shared(); + blob_service_client = std::make_unique(connection_url, managed_identity_credential); + } } std::string final_url; - size_t pos = connection_url.find('?'); if (pos != std::string::npos) { auto url_without_sas = connection_url.substr(0, pos); @@ -216,7 +226,16 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a if (storage_shared_key_credential) result = std::make_unique(final_url, storage_shared_key_credential); else - result = std::make_unique(final_url, managed_identity_credential); + { + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + auto workload_identity_credential = std::make_shared(); + result = std::make_unique(final_url, workload_identity_credential); + } + else + result = std::make_unique(final_url, managed_identity_credential); + } } else { @@ -236,7 +255,16 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a if (storage_shared_key_credential) result = std::make_unique(final_url, storage_shared_key_credential); else - result = std::make_unique(final_url, managed_identity_credential); + { + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + auto workload_identity_credential = std::make_shared(); + result = std::make_unique(final_url, workload_identity_credential); + } + else + result = std::make_unique(final_url, managed_identity_credential); + } } else { @@ -249,7 +277,7 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a return result; } -void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection) + void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection, ContextPtr) { validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index 35b19079ca9..bbaa82c51ba 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -51,7 +51,7 @@ public: ContextPtr context) override; protected: - void fromNamedCollection(const NamedCollection & collection) override; + void fromNamedCollection(const NamedCollection & collection, ContextPtr context) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; using AzureClient = Azure::Storage::Blobs::BlobContainerClient; diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 38bf3112ee2..bc64ef15cf1 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -4,19 +4,41 @@ #include #if USE_AWS_S3 && USE_PARQUET -#include + +#include +#include +#include +#include + +#include +#include +#include + #include #include #include -#include -#include -#include -#include -#include -#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include +#include #include -#include +#include +#include +#include + +namespace fs = std::filesystem; namespace DB { @@ -25,10 +47,14 @@ namespace ErrorCodes { extern const int INCORRECT_DATA; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } -struct DeltaLakeMetadata::Impl +struct DeltaLakeMetadataImpl { + using ConfigurationPtr = DeltaLakeMetadata::ConfigurationPtr; + ObjectStoragePtr object_storage; ConfigurationPtr configuration; ContextPtr context; @@ -37,7 +63,7 @@ struct DeltaLakeMetadata::Impl * Useful links: * - https://github.com/delta-io/delta/blob/master/PROTOCOL.md#data-files */ - Impl(ObjectStoragePtr object_storage_, + DeltaLakeMetadataImpl(ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, ContextPtr context_) : object_storage(object_storage_) @@ -74,9 +100,17 @@ struct DeltaLakeMetadata::Impl * An action changes one aspect of the table's state, for example, adding or removing a file. * Note: it is not a valid json, but a list of json's, so we read it in a while cycle. */ - std::set processMetadataFiles() + struct DeltaLakeMetadata + { + NamesAndTypesList schema; + Strings data_files; + DataLakePartitionColumns partition_columns; + }; + DeltaLakeMetadata processMetadataFiles() { std::set result_files; + NamesAndTypesList current_schema; + DataLakePartitionColumns current_partition_columns; const auto checkpoint_version = getCheckpointIfExists(result_files); if (checkpoint_version) @@ -90,7 +124,7 @@ struct DeltaLakeMetadata::Impl if (!object_storage->exists(StoredObject(file_path))) break; - processMetadataFile(file_path, result_files); + processMetadataFile(file_path, current_schema, current_partition_columns, result_files); } LOG_TRACE( @@ -101,10 +135,10 @@ struct DeltaLakeMetadata::Impl { const auto keys = listFiles(*object_storage, *configuration, deltalake_metadata_directory, metadata_file_suffix); for (const String & key : keys) - processMetadataFile(key, result_files); + processMetadataFile(key, current_schema, current_partition_columns, result_files); } - return result_files; + return DeltaLakeMetadata{current_schema, Strings(result_files.begin(), result_files.end()), current_partition_columns}; } /** @@ -136,10 +170,20 @@ struct DeltaLakeMetadata::Impl * \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}} * " */ - void processMetadataFile(const String & key, std::set & result) const + + /// Read metadata file and fill `file_schema`, `file_parition_columns`, `result`. + /// `result` is a list of data files. + /// `file_schema` is a common schema for all files. + /// Schema evolution is not supported, so we check that all files have the same schema. + /// `file_partiion_columns` is information about partition columns of data files. + void processMetadataFile( + const String & metadata_file_path, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns, + std::set & result) { auto read_settings = context->getReadSettings(); - auto buf = object_storage->readObject(StoredObject(key), read_settings); + auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings); char c; while (!buf->eof()) @@ -157,20 +201,239 @@ struct DeltaLakeMetadata::Impl if (json_str.empty()) continue; - const JSON json(json_str); - if (json.has("add")) + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(json_str); + Poco::JSON::Object::Ptr object = json.extract(); + + // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + // object->stringify(oss); + // LOG_TEST(log, "Metadata: {}", oss.str()); + + if (object->has("add")) { - const auto path = json["add"]["path"].getString(); - result.insert(std::filesystem::path(configuration->getPath()) / path); + auto add_object = object->get("add").extract(); + auto path = add_object->getValue("path"); + result.insert(fs::path(configuration->getPath()) / path); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + if (add_object->has("partitionValues")) + { + auto partition_values = add_object->get("partitionValues").extract(); + if (partition_values->size()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & partition_name : partition_values->getNames()) + { + const auto value = partition_values->getValue(partition_name); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } + } } - else if (json.has("remove")) + else if (object->has("remove")) { - const auto path = json["remove"]["path"].getString(); - result.erase(std::filesystem::path(configuration->getPath()) / path); + auto path = object->get("remove").extract()->getValue("path"); + result.erase(fs::path(configuration->getPath()) / path); + } + if (object->has("metaData")) + { + const auto metadata_object = object->get("metaData").extract(); + const auto schema_object = metadata_object->getValue("schemaString"); + + Poco::JSON::Parser p; + Poco::Dynamic::Var fields_json = parser.parse(schema_object); + Poco::JSON::Object::Ptr fields_object = fields_json.extract(); + + const auto fields = fields_object->get("fields").extract(); + NamesAndTypesList current_schema; + for (size_t i = 0; i < fields->size(); ++i) + { + const auto field = fields->getObject(static_cast(i)); + auto column_name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); + + std::string physical_name; + auto schema_metadata_object = field->get("metadata").extract(); + if (schema_metadata_object->has("delta.columnMapping.physicalName")) + physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); + else + physical_name = column_name; + + LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", + column_name, type, is_nullable, physical_name); + + current_schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); + } + + if (file_schema.empty()) + { + file_schema = current_schema; + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } } } } + DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable) + { + if (field->isObject(type_key)) + return getComplexTypeFromObject(field->getObject(type_key)); + + auto type = field->get(type_key); + if (type.isString()) + { + const String & type_name = type.extract(); + auto data_type = getSimpleTypeByName(type_name); + return is_nullable ? makeNullable(data_type) : data_type; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'type' field: {}", type.toString()); + } + + Field getFieldValue(const String & value, DataTypePtr data_type) + { + DataTypePtr check_type; + if (data_type->isNullable()) + check_type = static_cast(data_type.get())->getNestedType(); + else + check_type = data_type; + + WhichDataType which(check_type->getTypeId()); + if (which.isStringOrFixedString()) + return value; + else if (which.isInt8()) + return parse(value); + else if (which.isUInt8()) + return parse(value); + else if (which.isInt16()) + return parse(value); + else if (which.isUInt16()) + return parse(value); + else if (which.isInt32()) + return parse(value); + else if (which.isUInt32()) + return parse(value); + else if (which.isInt64()) + return parse(value); + else if (which.isUInt64()) + return parse(value); + else if (which.isFloat32()) + return parse(value); + else if (which.isFloat64()) + return parse(value); + else if (which.isDate()) + return UInt16{LocalDate{std::string(value)}.getDayNum()}; + else if (which.isDate32()) + return Int32{LocalDate{std::string(value)}.getExtenedDayNum()}; + else if (which.isDateTime64()) + { + ReadBufferFromString in(value); + DateTime64 time = 0; + readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); + return time; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type for {}", check_type->getColumnType()); + } + + DataTypePtr getSimpleTypeByName(const String & type_name) + { + /// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#primitive-types + + if (type_name == "string" || type_name == "binary") + return std::make_shared(); + if (type_name == "long") + return std::make_shared(); + if (type_name == "integer") + return std::make_shared(); + if (type_name == "short") + return std::make_shared(); + if (type_name == "byte") + return std::make_shared(); + if (type_name == "float") + return std::make_shared(); + if (type_name == "double") + return std::make_shared(); + if (type_name == "boolean") + return DataTypeFactory::instance().get("Bool"); + if (type_name == "date") + return std::make_shared(); + if (type_name == "timestamp") + return std::make_shared(6); + if (type_name.starts_with("decimal(") && type_name.ends_with(')')) + { + ReadBufferFromString buf(std::string_view(type_name.begin() + 8, type_name.end() - 1)); + size_t precision; + size_t scale; + readIntText(precision, buf); + skipWhitespaceIfAny(buf); + assertChar(',', buf); + skipWhitespaceIfAny(buf); + tryReadIntText(scale, buf); + return createDecimal(precision, scale); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + + DataTypePtr getComplexTypeFromObject(const Poco::JSON::Object::Ptr & type) + { + String type_name = type->getValue("type"); + + if (type_name == "struct") + { + DataTypes element_types; + Names element_names; + auto fields = type->get("fields").extract(); + element_types.reserve(fields->size()); + element_names.reserve(fields->size()); + for (size_t i = 0; i != fields->size(); ++i) + { + auto field = fields->getObject(static_cast(i)); + element_names.push_back(field->getValue("name")); + auto required = field->getValue("required"); + element_types.push_back(getFieldType(field, "type", required)); + } + + return std::make_shared(element_types, element_names); + } + + if (type_name == "array") + { + bool is_nullable = type->getValue("containsNull"); + auto element_type = getFieldType(type, "elementType", is_nullable); + return std::make_shared(element_type); + } + + if (type_name == "map") + { + bool is_nullable = type->getValue("containsNull"); + auto key_type = getFieldType(type, "keyType", /* is_nullable */false); + auto value_type = getFieldType(type, "valueType", is_nullable); + return std::make_shared(key_type, value_type); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported DeltaLake type: {}", type_name); + } + /** * Checkpoints in delta-lake are created each 10 commits by default. * Latest checkpoint is written in _last_checkpoint file: _delta_log/_last_checkpoint @@ -277,8 +540,8 @@ struct DeltaLakeMetadata::Impl ArrowMemoryPool::instance(), &reader)); - std::shared_ptr schema; - THROW_ARROW_NOT_OK(reader->GetSchema(&schema)); + std::shared_ptr file_schema; + THROW_ARROW_NOT_OK(reader->GetSchema(&file_schema)); ArrowColumnToCHColumn column_reader( header, "Parquet", @@ -325,18 +588,15 @@ DeltaLakeMetadata::DeltaLakeMetadata( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, ContextPtr context_) - : impl(std::make_unique(object_storage_, configuration_, context_)) { -} + auto impl = DeltaLakeMetadataImpl(object_storage_, configuration_, context_); + auto result = impl.processMetadataFiles(); + data_files = result.data_files; + schema = result.schema; + partition_columns = result.partition_columns; -Strings DeltaLakeMetadata::getDataFiles() const -{ - if (!data_files.empty()) - return data_files; - - auto result = impl->processMetadataFiles(); - data_files = Strings(result.begin(), result.end()); - return data_files; + LOG_TRACE(impl.log, "Found {} data files, {} partition files, schema: {}", + data_files.size(), partition_columns.size(), schema.toString()); } } diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h index e527721b29e..a479a3dd293 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h @@ -20,9 +20,13 @@ public: ConfigurationPtr configuration_, ContextPtr context_); - Strings getDataFiles() const override; + Strings getDataFiles() const override { return data_files; } - NamesAndTypesList getTableSchema() const override { return {}; } + NamesAndTypesList getTableSchema() const override { return schema; } + + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } + + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } bool operator ==(const IDataLakeMetadata & other) const override { @@ -41,9 +45,10 @@ public: } private: - struct Impl; - const std::shared_ptr impl; mutable Strings data_files; + NamesAndTypesList schema; + std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; }; } diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h index 3ab274b1fbf..b060b1b0d39 100644 --- a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h @@ -26,6 +26,10 @@ public: NamesAndTypesList getTableSchema() const override { return {}; } + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } + + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } + bool operator ==(const IDataLakeMetadata & other) const override { const auto * hudi_metadata = dynamic_cast(&other); @@ -46,6 +50,8 @@ private: const ObjectStoragePtr object_storage; const ConfigurationPtr configuration; mutable Strings data_files; + std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; Strings getDataFilesImpl() const; }; diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h index a2bd5adb947..2954d50db91 100644 --- a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h @@ -2,6 +2,7 @@ #include #include #include +#include "PartitionColumns.h" namespace DB { @@ -13,6 +14,8 @@ public: virtual Strings getDataFiles() const = 0; virtual NamesAndTypesList getTableSchema() const = 0; virtual bool operator==(const IDataLakeMetadata & other) const = 0; + virtual const DataLakePartitionColumns & getPartitionColumns() const = 0; + virtual const std::unordered_map & getColumnNameToPhysicalNameMapping() const = 0; }; using DataLakeMetadataPtr = std::unique_ptr; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index 83865c47eb8..f1217bc9729 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -81,7 +81,7 @@ public: auto metadata = DataLakeMetadata::create(object_storage_, base_configuration, local_context); auto schema_from_metadata = metadata->getTableSchema(); - if (schema_from_metadata != NamesAndTypesList{}) + if (!schema_from_metadata.empty()) { return ColumnsDescription(std::move(schema_from_metadata)); } @@ -99,13 +99,13 @@ public: Storage::updateConfiguration(local_context); auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); - if (current_metadata && *current_metadata == *new_metadata) return; current_metadata = std::move(new_metadata); auto updated_configuration = base_configuration->clone(); updated_configuration->setPaths(current_metadata->getDataFiles()); + updated_configuration->setPartitionColumns(current_metadata->getPartitionColumns()); Storage::configuration = updated_configuration; } @@ -123,11 +123,42 @@ public: { base_configuration->format = Storage::configuration->format; } + + if (current_metadata) + { + const auto & columns = current_metadata->getPartitionColumns(); + base_configuration->setPartitionColumns(columns); + Storage::configuration->setPartitionColumns(columns); + } } private: ConfigurationPtr base_configuration; DataLakeMetadataPtr current_metadata; + + ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context) override + { + auto info = DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); + if (!current_metadata) + { + Storage::updateConfiguration(local_context); + current_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context); + } + auto column_mapping = current_metadata->getColumnNameToPhysicalNameMapping(); + if (!column_mapping.empty()) + { + for (const auto & [column_name, physical_name] : column_mapping) + { + auto & column = info.format_header.getByName(column_name); + column.name = physical_name; + } + } + return info; + } }; using StorageIceberg = IStorageDataLake; diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h index 06dbd373bf9..9476ac6e7d9 100644 --- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h +++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h @@ -82,6 +82,10 @@ public: /// Get table schema parsed from metadata. NamesAndTypesList getTableSchema() const override { return schema; } + const std::unordered_map & getColumnNameToPhysicalNameMapping() const override { return column_name_to_physical_name; } + + const DataLakePartitionColumns & getPartitionColumns() const override { return partition_columns; } + bool operator ==(const IDataLakeMetadata & other) const override { const auto * iceberg_metadata = dynamic_cast(&other); @@ -104,6 +108,8 @@ private: Int32 current_schema_id; NamesAndTypesList schema; mutable Strings data_files; + std::unordered_map column_name_to_physical_name; + DataLakePartitionColumns partition_columns; LoggerPtr log; }; diff --git a/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h b/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h new file mode 100644 index 00000000000..eb605559145 --- /dev/null +++ b/src/Storages/ObjectStorage/DataLakes/PartitionColumns.h @@ -0,0 +1,19 @@ +#pragma once +#include +#include + +namespace DB +{ + +struct DataLakePartitionColumn +{ + NameAndTypePair name_and_type; + Field value; + + bool operator ==(const DataLakePartitionColumn & other) const = default; +}; + +/// Data file -> partition columns +using DataLakePartitionColumns = std::unordered_map>; + +} diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index a8a9ab5b557..155f51adf61 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -119,7 +119,7 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit setURL(url_str); } -void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection & collection) +void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection & collection, ContextPtr) { std::string url_str; diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index 01a8b9c5e3b..04884542908 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -46,7 +46,7 @@ public: ContextPtr context) override; private: - void fromNamedCollection(const NamedCollection &) override; + void fromNamedCollection(const NamedCollection &, ContextPtr context) override; void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override; void setURL(const std::string & url_); diff --git a/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp index 2c14b38ce01..e2e7f238a5e 100644 --- a/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp @@ -132,11 +132,12 @@ void WriteBufferFromHDFS::sync() } -void WriteBufferFromHDFS::finalizeImpl() +WriteBufferFromHDFS::~WriteBufferFromHDFS() { try { - next(); + if (!canceled) + finalize(); } catch (...) { @@ -144,11 +145,5 @@ void WriteBufferFromHDFS::finalizeImpl() } } - -WriteBufferFromHDFS::~WriteBufferFromHDFS() -{ - finalize(); -} - } #endif diff --git a/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h index 71e6e55addc..e3f0ae96a8f 100644 --- a/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h +++ b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h @@ -38,8 +38,6 @@ public: std::string getFileName() const override { return filename; } private: - void finalizeImpl() override; - struct WriteBufferFromHDFSImpl; std::unique_ptr impl; const std::string filename; diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 4b217b94730..b33d55105e9 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -106,15 +106,18 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, const auto & config = context->getConfigRef(); const auto & settings = context->getSettingsRef(); - const std::string config_prefix = "s3."; - auto s3_settings = getSettings(config, config_prefix, context, settings.s3_validate_request_settings); + auto s3_settings = getSettings( + config, "s3"/* config_prefix */, context, url.uri_str, settings.s3_validate_request_settings); - request_settings.updateFromSettingsIfChanged(settings); - auth_settings.updateFrom(s3_settings->auth_settings); + if (auto endpoint_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName())) + { + s3_settings->auth_settings.updateIfChanged(endpoint_settings->auth_settings); + s3_settings->request_settings.updateIfChanged(endpoint_settings->request_settings); + } - s3_settings->auth_settings = auth_settings; - s3_settings->request_settings = request_settings; + s3_settings->auth_settings.updateIfChanged(auth_settings); + s3_settings->request_settings.updateIfChanged(request_settings); if (!headers_from_ast.empty()) { @@ -123,10 +126,7 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, headers_from_ast.begin(), headers_from_ast.end()); } - if (auto endpoint_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName())) - s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings); - - auto client = getClient(config, config_prefix, context, *s3_settings, false, &url); + auto client = getClient(url, *s3_settings, context, /* for_disk_s3 */false); auto key_generator = createObjectStorageKeysGeneratorAsIsWithPrefix(url.key); auto s3_capabilities = S3Capabilities { @@ -139,8 +139,9 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, key_generator, "StorageS3", false); } -void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection) +void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection, ContextPtr context) { + const auto settings = context->getSettingsRef(); validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); auto filename = collection.getOrDefault("filename", ""); @@ -159,9 +160,9 @@ void StorageS3Configuration::fromNamedCollection(const NamedCollection & collect compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); structure = collection.getOrDefault("structure", "auto"); - request_settings = S3Settings::RequestSettings(collection); + request_settings = S3::RequestSettings(collection, settings, /* validate_settings */true); - static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value(); + static_configuration = !auth_settings.access_key_id.value.empty() || auth_settings.no_sign_request.changed; keys = {url.key}; } @@ -357,7 +358,7 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_ if (no_sign_request) auth_settings.no_sign_request = no_sign_request; - static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value(); + static_configuration = !auth_settings.access_key_id.value.empty() || auth_settings.no_sign_request.changed; auth_settings.no_sign_request = no_sign_request; keys = {url.key}; diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index 906d10a1a9a..39a646c7df2 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -3,7 +3,7 @@ #include "config.h" #if USE_AWS_S3 -#include +#include #include namespace DB @@ -51,14 +51,14 @@ public: ContextPtr context) override; private: - void fromNamedCollection(const NamedCollection & collection) override; + void fromNamedCollection(const NamedCollection & collection, ContextPtr context) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; S3::URI url; std::vector keys; S3::AuthSettings auth_settings; - S3Settings::RequestSettings request_settings; + S3::RequestSettings request_settings; HTTPHeaderEntries headers_from_ast; /// Headers from ast is a part of static configuration. /// If s3 configuration was passed from ast, then it is static. /// If from config - it can be changed with config reload. diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 2c8e60b49d0..683473006e3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -203,6 +203,15 @@ private: }; } +ReadFromFormatInfo StorageObjectStorage::prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr /* local_context */) +{ + return DB::prepareReadingFromFormat(requested_columns, storage_snapshot, supports_subset_of_columns); +} + void StorageObjectStorage::read( QueryPlan & query_plan, const Names & column_names, @@ -222,7 +231,7 @@ void StorageObjectStorage::read( } const auto read_from_format_info = prepareReadingFromFormat( - column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); + column_names, storage_snapshot, supportsSubsetOfColumns(local_context), local_context); const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty()) && local_context->getSettingsRef().optimize_count_from_files; @@ -424,7 +433,7 @@ void StorageObjectStorage::Configuration::initialize( bool with_table_structure) { if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context)) - configuration.fromNamedCollection(*named_collection); + configuration.fromNamedCollection(*named_collection, local_context); else configuration.fromAST(engine_args, local_context, with_table_structure); @@ -451,6 +460,7 @@ StorageObjectStorage::Configuration::Configuration(const Configuration & other) format = other.format; compression_method = other.compression_method; structure = other.structure; + partition_columns = other.partition_columns; } bool StorageObjectStorage::Configuration::withPartitionWildcard() const diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index f45d8c1f01a..c93a0bf6943 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -117,6 +118,12 @@ public: protected: virtual void updateConfiguration(ContextPtr local_context); + virtual ReadFromFormatInfo prepareReadingFromFormat( + const Strings & requested_columns, + const StorageSnapshotPtr & storage_snapshot, + bool supports_subset_of_columns, + ContextPtr local_context); + static std::unique_ptr createReadBufferIterator( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, @@ -188,17 +195,21 @@ public: virtual ConfigurationPtr clone() = 0; virtual bool isStaticConfiguration() const { return true; } + void setPartitionColumns(const DataLakePartitionColumns & columns) { partition_columns = columns; } + const DataLakePartitionColumns & getPartitionColumns() const { return partition_columns; } + String format = "auto"; String compression_method = "auto"; String structure = "auto"; protected: - virtual void fromNamedCollection(const NamedCollection & collection) = 0; + virtual void fromNamedCollection(const NamedCollection & collection, ContextPtr context) = 0; virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0; void assertInitialized() const; bool initialized = false; + DataLakePartitionColumns partition_columns; }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index 0a3cf19a590..f2f6eac333c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -50,55 +50,58 @@ void StorageObjectStorageSink::consume(Chunk chunk) void StorageObjectStorageSink::onCancel() { std::lock_guard lock(cancel_mutex); - finalize(); + cancelBuffers(); + releaseBuffers(); cancelled = true; } -void StorageObjectStorageSink::onException(std::exception_ptr exception) +void StorageObjectStorageSink::onException(std::exception_ptr) { std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization. - release(); - } + cancelBuffers(); + releaseBuffers(); } void StorageObjectStorageSink::onFinish() { std::lock_guard lock(cancel_mutex); - finalize(); + finalizeBuffers(); } -void StorageObjectStorageSink::finalize() +void StorageObjectStorageSink::finalizeBuffers() { if (!writer) return; try { - writer->finalize(); writer->flush(); - write_buf->finalize(); + writer->finalize(); } catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - release(); + releaseBuffers(); throw; } + + write_buf->finalize(); } -void StorageObjectStorageSink::release() +void StorageObjectStorageSink::releaseBuffers() { writer.reset(); write_buf.reset(); } +void StorageObjectStorageSink::cancelBuffers() +{ + if (writer) + writer->cancel(); + if (write_buf) + write_buf->cancel(); +} + PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 45cf83d606f..e0081193686 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -35,8 +35,9 @@ private: bool cancelled = false; std::mutex cancel_mutex; - void finalize(); - void release(); + void finalizeBuffers(); + void releaseBuffers(); + void cancelBuffers(); }; class PartitionedStorageObjectStorageSink : public PartitionedSink diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index b31d0f8a92e..3c1c2f9bba1 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -193,19 +193,44 @@ Chunk StorageObjectStorageSource::generate() progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); const auto & object_info = reader.getObjectInfo(); - const auto & filename = object_info.getFileName(); - chassert(object_info.metadata); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( + const auto & filename = object_info->getFileName(); + chassert(object_info->metadata); + VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( chunk, read_from_format_info.requested_virtual_columns, - getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), - object_info.metadata->size_bytes, &filename); + {.path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), + .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, + .filename = &filename, + .last_modified = object_info->metadata->last_modified}); + const auto & partition_columns = configuration->getPartitionColumns(); + if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) + { + auto partition_values = partition_columns.find(filename); + + for (const auto & [name_and_type, value] : partition_values->second) + { + if (!read_from_format_info.source_header.has(name_and_type.name)) + continue; + + const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); + + /// This column is filled with default value now, remove it. + chunk.erase(column_pos); + + /// Add correct values. + if (chunk.hasColumns()) + chunk.addColumn(column_pos, std::move(partition_column)); + else + chunk.addColumn(std::move(partition_column)); + } + } return chunk; } if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getObjectInfo(), total_rows_in_file); + addNumRowsToCache(*reader.getObjectInfo(), total_rows_in_file); total_rows_in_file = 0; @@ -515,24 +540,22 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne else ++it; } + + if (filter_dag) + { + std::vector paths; + paths.reserve(new_batch.size()); + for (const auto & object_info : new_batch) + paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); + + VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); + + LOG_TEST(logger, "Filtered files: {} -> {}", paths.size(), new_batch.size()); + } } index = 0; - if (filter_dag) - { - std::vector paths; - paths.reserve(new_batch.size()); - for (const auto & object_info : new_batch) - { - chassert(object_info); - paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); - } - - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); - LOG_TEST(logger, "Filtered files: {} -> {}", paths.size(), new_batch.size()); - } - if (read_keys) read_keys->insert(read_keys->end(), new_batch.begin(), new_batch.end()); @@ -549,7 +572,12 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne } if (index >= object_infos.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Index out of bound for blob metadata. Index: {}, size: {}", + index, object_infos.size()); + } return object_infos[index++]; } @@ -685,10 +713,9 @@ static IArchiveReader::NameFilter createArchivePathFilter(const std::string & ar StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive::ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_) - : archive_object(archive_object_) - , path_in_archive(path_in_archive_) - , archive_reader(archive_reader_) + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_) + : archive_object(archive_object_), path_in_archive(path_in_archive_), archive_reader(archive_reader_), file_info(file_info_) { } @@ -727,6 +754,7 @@ StorageObjectStorageSource::ObjectInfoPtr StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) { std::unique_lock lock{next_mutex}; + IArchiveReader::FileInfo current_file_info{}; while (true) { if (filter) @@ -751,6 +779,8 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) path_in_archive = file_enumerator->getFileName(); if (!filter(path_in_archive)) continue; + else + current_file_info = file_enumerator->getFileInfo(); } else { @@ -764,15 +794,19 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor) archive_reader = createArchiveReader(archive_object); if (!archive_reader->fileExists(path_in_archive)) continue; + else + current_file_info = archive_reader->getFileInfo(path_in_archive); } - - auto object_in_archive = std::make_shared(archive_object, path_in_archive, archive_reader); - - if (read_keys != nullptr) - read_keys->push_back(object_in_archive); - - return object_in_archive; + break; } + + auto object_in_archive + = std::make_shared(archive_object, path_in_archive, archive_reader, std::move(current_file_info)); + + if (read_keys != nullptr) + read_keys->push_back(object_in_archive); + + return object_in_archive; } size_t StorageObjectStorageSource::ArchiveIterator::estimatedKeysCount() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index fd7c7aa7102..a9e28b93b5c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -15,7 +16,7 @@ class SchemaCache; class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext { - friend class StorageS3QueueSource; + friend class ObjectStorageQueueSource; public: using Configuration = StorageObjectStorage::Configuration; using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; @@ -100,7 +101,7 @@ protected: PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - const ObjectInfo & getObjectInfo() const { return *object_info; } + ObjectInfoPtr getObjectInfo() const { return object_info; } const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } private: @@ -259,7 +260,8 @@ public: ObjectInfoInArchive( ObjectInfoPtr archive_object_, const std::string & path_in_archive_, - std::shared_ptr archive_reader_); + std::shared_ptr archive_reader_, + IArchiveReader::FileInfo && file_info_); std::string getFileName() const override { @@ -278,9 +280,12 @@ public: bool isArchive() const override { return true; } + size_t fileSizeInArchive() const override { return file_info.uncompressed_size; } + const ObjectInfoPtr archive_object; const std::string path_in_archive; const std::shared_ptr archive_reader; + const IArchiveReader::FileInfo file_info; }; private: diff --git a/src/Storages/S3Queue/S3QueueIFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp similarity index 73% rename from src/Storages/S3Queue/S3QueueIFileMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp index 6c4089115d4..52ee0c9f8ed 100644 --- a/src/Storages/S3Queue/S3QueueIFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -11,8 +11,8 @@ namespace ProfileEvents { - extern const Event S3QueueProcessedFiles; - extern const Event S3QueueFailedFiles; + extern const Event ObjectStorageQueueProcessedFiles; + extern const Event ObjectStorageQueueFailedFiles; }; namespace DB @@ -35,33 +35,40 @@ namespace } } -void S3QueueIFileMetadata::FileStatus::onProcessing() +void ObjectStorageQueueIFileMetadata::FileStatus::setProcessingEndTime() +{ + processing_end_time = now(); +} + +void ObjectStorageQueueIFileMetadata::FileStatus::onProcessing() { state = FileStatus::State::Processing; processing_start_time = now(); } -void S3QueueIFileMetadata::FileStatus::onProcessed() +void ObjectStorageQueueIFileMetadata::FileStatus::onProcessed() { state = FileStatus::State::Processed; - processing_end_time = now(); + if (!processing_end_time) + setProcessingEndTime(); } -void S3QueueIFileMetadata::FileStatus::onFailed(const std::string & exception) +void ObjectStorageQueueIFileMetadata::FileStatus::onFailed(const std::string & exception) { state = FileStatus::State::Failed; - processing_end_time = now(); + if (!processing_end_time) + setProcessingEndTime(); std::lock_guard lock(last_exception_mutex); last_exception = exception; } -std::string S3QueueIFileMetadata::FileStatus::getException() const +std::string ObjectStorageQueueIFileMetadata::FileStatus::getException() const { std::lock_guard lock(last_exception_mutex); return last_exception; } -std::string S3QueueIFileMetadata::NodeMetadata::toString() const +std::string ObjectStorageQueueIFileMetadata::NodeMetadata::toString() const { Poco::JSON::Object json; json.set("file_path", file_path); @@ -76,7 +83,7 @@ std::string S3QueueIFileMetadata::NodeMetadata::toString() const return oss.str(); } -S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::NodeMetadata::fromString(const std::string & metadata_str) +ObjectStorageQueueIFileMetadata::NodeMetadata ObjectStorageQueueIFileMetadata::NodeMetadata::fromString(const std::string & metadata_str) { Poco::JSON::Parser parser; auto json = parser.parse(metadata_str).extract(); @@ -91,7 +98,7 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::NodeMetadata::fromStrin return metadata; } -S3QueueIFileMetadata::S3QueueIFileMetadata( +ObjectStorageQueueIFileMetadata::ObjectStorageQueueIFileMetadata( const std::string & path_, const std::string & processing_node_path_, const std::string & processed_node_path_, @@ -116,11 +123,18 @@ S3QueueIFileMetadata::S3QueueIFileMetadata( processed_node_path, processing_node_path, failed_node_path); } -S3QueueIFileMetadata::~S3QueueIFileMetadata() +ObjectStorageQueueIFileMetadata::~ObjectStorageQueueIFileMetadata() { if (processing_id_version.has_value()) { - file_status->onFailed("Uncaught exception"); + if (file_status->getException().empty()) + { + if (std::current_exception()) + file_status->onFailed(getCurrentExceptionMessage(true)); + else + file_status->onFailed("Unprocessed exception"); + } + LOG_TEST(log, "Removing processing node in destructor for file: {}", path); try { @@ -148,9 +162,9 @@ S3QueueIFileMetadata::~S3QueueIFileMetadata() } } -std::string S3QueueIFileMetadata::getNodeName(const std::string & path) +std::string ObjectStorageQueueIFileMetadata::getNodeName(const std::string & path) { - /// Since with are dealing with paths in s3 which can have "/", + /// Since with are dealing with paths in object storage which can have "/", /// we cannot create a zookeeper node with the name equal to path. /// Therefore we use a hash of the path as a node name. @@ -159,7 +173,7 @@ std::string S3QueueIFileMetadata::getNodeName(const std::string & path) return toString(path_hash.get64()); } -S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata( +ObjectStorageQueueIFileMetadata::NodeMetadata ObjectStorageQueueIFileMetadata::createNodeMetadata( const std::string & path, const std::string & exception, size_t retries) @@ -168,9 +182,9 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata( /// Since node name is just a hash we want to know to which file it corresponds, /// so we keep "file_path" in nodes data. - /// "last_processed_timestamp" is needed for TTL metadata nodes enabled by s3queue_tracked_file_ttl_sec. - /// "last_exception" is kept for introspection, should also be visible in system.s3queue_log if it is enabled. - /// "retries" is kept for retrying the processing enabled by s3queue_loading_retries. + /// "last_processed_timestamp" is needed for TTL metadata nodes enabled by tracked_file_ttl_sec. + /// "last_exception" is kept for introspection, should also be visible in system.s3(azure)queue_log if it is enabled. + /// "retries" is kept for retrying the processing enabled by loading_retries. NodeMetadata metadata; metadata.file_path = path; metadata.last_processed_timestamp = now(); @@ -179,7 +193,7 @@ S3QueueIFileMetadata::NodeMetadata S3QueueIFileMetadata::createNodeMetadata( return metadata; } -std::string S3QueueIFileMetadata::getProcessorInfo(const std::string & processor_id) +std::string ObjectStorageQueueIFileMetadata::getProcessorInfo(const std::string & processor_id) { /// Add information which will be useful for debugging just in case. Poco::JSON::Object json; @@ -192,7 +206,7 @@ std::string S3QueueIFileMetadata::getProcessorInfo(const std::string & processor return oss.str(); } -bool S3QueueIFileMetadata::setProcessing() +bool ObjectStorageQueueIFileMetadata::setProcessing() { auto state = file_status->state.load(); if (state == FileStatus::State::Processing @@ -221,13 +235,22 @@ bool S3QueueIFileMetadata::setProcessing() return success; } -void S3QueueIFileMetadata::setProcessed() +void ObjectStorageQueueIFileMetadata::setProcessed() { LOG_TRACE(log, "Setting file {} as processed (path: {})", path, processed_node_path); - ProfileEvents::increment(ProfileEvents::S3QueueProcessedFiles); + ProfileEvents::increment(ProfileEvents::ObjectStorageQueueProcessedFiles); file_status->onProcessed(); - setProcessedImpl(); + + try + { + setProcessedImpl(); + } + catch (...) + { + file_status->onFailed(getCurrentExceptionMessage(true)); + throw; + } processing_id.reset(); processing_id_version.reset(); @@ -235,18 +258,36 @@ void S3QueueIFileMetadata::setProcessed() LOG_TRACE(log, "Set file {} as processed (rows: {})", path, file_status->processed_rows); } -void S3QueueIFileMetadata::setFailed(const std::string & exception) +void ObjectStorageQueueIFileMetadata::setFailed(const std::string & exception_message, bool reduce_retry_count, bool overwrite_status) { - LOG_TRACE(log, "Setting file {} as failed (exception: {}, path: {})", path, exception, failed_node_path); + LOG_TRACE(log, "Setting file {} as failed (path: {}, reduce retry count: {}, exception: {})", + path, failed_node_path, reduce_retry_count, exception_message); - ProfileEvents::increment(ProfileEvents::S3QueueFailedFiles); - file_status->onFailed(exception); - node_metadata.last_exception = exception; + ProfileEvents::increment(ProfileEvents::ObjectStorageQueueFailedFiles); + if (overwrite_status || file_status->state != FileStatus::State::Failed) + file_status->onFailed(exception_message); - if (max_loading_retries == 0) - setFailedNonRetriable(); - else - setFailedRetriable(); + node_metadata.last_exception = exception_message; + + if (reduce_retry_count) + { + try + { + if (max_loading_retries == 0) + setFailedNonRetriable(); + else + setFailedRetriable(); + } + catch (...) + { + auto full_exception = fmt::format( + "First exception: {}, exception while setting file as failed: {}", + exception_message, getCurrentExceptionMessage(true)); + + file_status->onFailed(full_exception); + throw; + } + } processing_id.reset(); processing_id_version.reset(); @@ -254,7 +295,7 @@ void S3QueueIFileMetadata::setFailed(const std::string & exception) LOG_TRACE(log, "Set file {} as failed (rows: {})", path, file_status->processed_rows); } -void S3QueueIFileMetadata::setFailedNonRetriable() +void ObjectStorageQueueIFileMetadata::setFailedNonRetriable() { auto zk_client = getZooKeeper(); Coordination::Requests requests; @@ -285,7 +326,7 @@ void S3QueueIFileMetadata::setFailedNonRetriable() throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error while setting file as failed: {}", code); } -void S3QueueIFileMetadata::setFailedRetriable() +void ObjectStorageQueueIFileMetadata::setFailedRetriable() { /// Instead of creating a persistent /failed/node_hash node /// we create a persistent /failed/node_hash.retriable node. @@ -296,19 +337,20 @@ void S3QueueIFileMetadata::setFailedRetriable() auto zk_client = getZooKeeper(); /// Extract the number of already done retries from node_hash.retriable node if it exists. + Coordination::Requests requests; Coordination::Stat stat; std::string res; - if (zk_client->tryGet(retrieable_failed_node_path, res, &stat)) + bool has_failed_before = zk_client->tryGet(retrieable_failed_node_path, res, &stat); + if (has_failed_before) { auto failed_node_metadata = NodeMetadata::fromString(res); node_metadata.retries = failed_node_metadata.retries + 1; file_status->retries = node_metadata.retries; } - LOG_TRACE(log, "File `{}` failed to process, try {}/{}", - path, node_metadata.retries, max_loading_retries); + LOG_TRACE(log, "File `{}` failed to process, try {}/{}, retries node exists: {} (failed node path: {})", + path, node_metadata.retries, max_loading_retries, has_failed_before, failed_node_path); - Coordination::Requests requests; if (node_metadata.retries >= max_loading_retries) { /// File is no longer retriable. diff --git a/src/Storages/S3Queue/S3QueueIFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h similarity index 88% rename from src/Storages/S3Queue/S3QueueIFileMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h index e0b0d16cbcc..f0e55c202a2 100644 --- a/src/Storages/S3Queue/S3QueueIFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h @@ -6,7 +6,7 @@ namespace DB { -class S3QueueIFileMetadata +class ObjectStorageQueueIFileMetadata { public: struct FileStatus @@ -19,6 +19,7 @@ public: None }; + void setProcessingEndTime(); void onProcessing(); void onProcessed(); void onFailed(const std::string & exception); @@ -33,7 +34,6 @@ public: std::atomic processing_start_time = 0; std::atomic processing_end_time = 0; std::atomic retries = 0; - ProfileEvents::Counters profile_counters; private: mutable std::mutex last_exception_mutex; @@ -41,7 +41,7 @@ public: }; using FileStatusPtr = std::shared_ptr; - explicit S3QueueIFileMetadata( + explicit ObjectStorageQueueIFileMetadata( const std::string & path_, const std::string & processing_node_path_, const std::string & processed_node_path_, @@ -50,17 +50,19 @@ public: size_t max_loading_retries_, LoggerPtr log_); - virtual ~S3QueueIFileMetadata(); + virtual ~ObjectStorageQueueIFileMetadata(); bool setProcessing(); void setProcessed(); - void setFailed(const std::string & exception); + void setFailed(const std::string & exception_message, bool reduce_retry_count, bool overwrite_status); virtual void setProcessedAtStartRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr & zk_client) = 0; FileStatusPtr getFileStatus() { return file_status; } + const std::string & getPath() const { return path; } + size_t getMaxTries() const { return max_loading_retries; } struct NodeMetadata { @@ -92,7 +94,7 @@ protected: LoggerPtr log; /// processing node is ephemeral, so we cannot verify with it if - /// this node was created by a certain processor on a previous s3 queue processing stage, + /// this node was created by a certain processor on a previous processing stage, /// because we could get a session expired in between the stages /// and someone else could just create this processing node. /// Therefore we also create a persistent processing node diff --git a/src/Storages/S3Queue/S3QueueMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp similarity index 71% rename from src/Storages/S3Queue/S3QueueMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp index f4c8c5c5ef2..23ac92b667a 100644 --- a/src/Storages/S3Queue/S3QueueMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.cpp @@ -4,13 +4,12 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -22,13 +21,8 @@ namespace ProfileEvents { - extern const Event S3QueueSetFileProcessingMicroseconds; - extern const Event S3QueueSetFileProcessedMicroseconds; - extern const Event S3QueueSetFileFailedMicroseconds; - extern const Event S3QueueFailedFiles; - extern const Event S3QueueProcessedFiles; - extern const Event S3QueueCleanupMaxSetSizeOrTTLMicroseconds; - extern const Event S3QueueLockLocalFileStatusesMicroseconds; + extern const Event ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds; + extern const Event ObjectStorageQueueLockLocalFileStatusesMicroseconds; }; namespace DB @@ -63,7 +57,7 @@ namespace } } -class S3QueueMetadata::LocalFileStatuses +class ObjectStorageQueueMetadata::LocalFileStatuses { public: LocalFileStatuses() = default; @@ -109,95 +103,89 @@ private: std::unique_lock lock() const { - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueLockLocalFileStatusesMicroseconds); + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueueLockLocalFileStatusesMicroseconds); return std::unique_lock(mutex); } }; -S3QueueMetadata::S3QueueMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_) +ObjectStorageQueueMetadata::ObjectStorageQueueMetadata(const fs::path & zookeeper_path_, const ObjectStorageQueueSettings & settings_) : settings(settings_) , zookeeper_path(zookeeper_path_) , buckets_num(getBucketsNum(settings_)) - , log(getLogger("StorageS3Queue(" + zookeeper_path_.string() + ")")) + , log(getLogger("StorageObjectStorageQueue(" + zookeeper_path_.string() + ")")) , local_file_statuses(std::make_shared()) { - if (settings.mode == S3QueueMode::UNORDERED - && (settings.s3queue_tracked_files_limit || settings.s3queue_tracked_file_ttl_sec)) + if (settings.mode == ObjectStorageQueueMode::UNORDERED + && (settings.tracked_files_limit || settings.tracked_file_ttl_sec)) { task = Context::getGlobalContextInstance()->getSchedulePool().createTask( - "S3QueueCleanupFunc", + "ObjectStorageQueueCleanupFunc", [this] { cleanupThreadFunc(); }); task->activate(); task->scheduleAfter( generateRescheduleInterval( - settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms)); + settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms)); } + LOG_TRACE(log, "Mode: {}, buckets: {}, processing threads: {}, result buckets num: {}", + settings.mode.toString(), settings.buckets, settings.processing_threads_num, buckets_num); + } -S3QueueMetadata::~S3QueueMetadata() +ObjectStorageQueueMetadata::~ObjectStorageQueueMetadata() { shutdown(); } -void S3QueueMetadata::shutdown() +void ObjectStorageQueueMetadata::shutdown() { shutdown_called = true; if (task) task->deactivate(); } -void S3QueueMetadata::checkSettings(const S3QueueSettings & settings_) const +void ObjectStorageQueueMetadata::checkSettings(const ObjectStorageQueueSettings & settings_) const { - S3QueueTableMetadata::checkEquals(settings, settings_); + ObjectStorageQueueTableMetadata::checkEquals(settings, settings_); } -S3QueueMetadata::FileStatusPtr S3QueueMetadata::getFileStatus(const std::string & path) +ObjectStorageQueueMetadata::FileStatusPtr ObjectStorageQueueMetadata::getFileStatus(const std::string & path) { return local_file_statuses->get(path, /* create */false); } -S3QueueMetadata::FileStatuses S3QueueMetadata::getFileStatuses() const +ObjectStorageQueueMetadata::FileStatuses ObjectStorageQueueMetadata::getFileStatuses() const { return local_file_statuses->getAll(); } -S3QueueMetadata::FileMetadataPtr S3QueueMetadata::getFileMetadata( +ObjectStorageQueueMetadata::FileMetadataPtr ObjectStorageQueueMetadata::getFileMetadata( const std::string & path, - S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info) + ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info) { auto file_status = local_file_statuses->get(path, /* create */true); - switch (settings.mode) + switch (settings.mode.value) { - case S3QueueMode::ORDERED: - return std::make_shared( + case ObjectStorageQueueMode::ORDERED: + return std::make_shared( zookeeper_path, path, file_status, bucket_info, buckets_num, - settings.s3queue_loading_retries, + settings.loading_retries, log); - case S3QueueMode::UNORDERED: - return std::make_shared( + case ObjectStorageQueueMode::UNORDERED: + return std::make_shared( zookeeper_path, path, file_status, - settings.s3queue_loading_retries, + settings.loading_retries, log); } } -size_t S3QueueMetadata::getBucketsNum(const S3QueueSettings & settings) -{ - if (settings.s3queue_buckets) - return settings.s3queue_buckets; - if (settings.s3queue_processing_threads_num) - return settings.s3queue_processing_threads_num; - return 0; -} - -size_t S3QueueMetadata::getBucketsNum(const S3QueueTableMetadata & settings) +size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueSettings & settings) { if (settings.buckets) return settings.buckets; @@ -206,32 +194,41 @@ size_t S3QueueMetadata::getBucketsNum(const S3QueueTableMetadata & settings) return 0; } -bool S3QueueMetadata::useBucketsForProcessing() const +size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueTableMetadata & settings) { - return settings.mode == S3QueueMode::ORDERED && (buckets_num > 1); + if (settings.buckets) + return settings.buckets; + if (settings.processing_threads_num) + return settings.processing_threads_num; + return 0; } -S3QueueMetadata::Bucket S3QueueMetadata::getBucketForPath(const std::string & path) const +bool ObjectStorageQueueMetadata::useBucketsForProcessing() const { - return S3QueueOrderedFileMetadata::getBucketForPath(path, buckets_num); + return settings.mode == ObjectStorageQueueMode::ORDERED && (buckets_num > 1); } -S3QueueOrderedFileMetadata::BucketHolderPtr -S3QueueMetadata::tryAcquireBucket(const Bucket & bucket, const Processor & processor) +ObjectStorageQueueMetadata::Bucket ObjectStorageQueueMetadata::getBucketForPath(const std::string & path) const { - return S3QueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor); + return ObjectStorageQueueOrderedFileMetadata::getBucketForPath(path, buckets_num); } -void S3QueueMetadata::initialize( +ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr +ObjectStorageQueueMetadata::tryAcquireBucket(const Bucket & bucket, const Processor & processor) +{ + return ObjectStorageQueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor, log); +} + +void ObjectStorageQueueMetadata::initialize( const ConfigurationPtr & configuration, const StorageInMemoryMetadata & storage_metadata) { - const auto metadata_from_table = S3QueueTableMetadata(*configuration, settings, storage_metadata); + const auto metadata_from_table = ObjectStorageQueueTableMetadata(*configuration, settings, storage_metadata); const auto & columns_from_table = storage_metadata.getColumns(); const auto table_metadata_path = zookeeper_path / "metadata"; - const auto metadata_paths = settings.mode == S3QueueMode::ORDERED - ? S3QueueOrderedFileMetadata::getMetadataPaths(buckets_num) - : S3QueueUnorderedFileMetadata::getMetadataPaths(); + const auto metadata_paths = settings.mode == ObjectStorageQueueMode::ORDERED + ? ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(buckets_num) + : ObjectStorageQueueUnorderedFileMetadata::getMetadataPaths(); auto zookeeper = getZooKeeper(); zookeeper->createAncestors(zookeeper_path); @@ -240,7 +237,7 @@ void S3QueueMetadata::initialize( { if (zookeeper->exists(table_metadata_path)) { - const auto metadata_from_zk = S3QueueTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata")); + const auto metadata_from_zk = ObjectStorageQueueTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata")); const auto columns_from_zk = ColumnsDescription::parse(metadata_from_zk.columns); metadata_from_table.checkEquals(metadata_from_zk); @@ -265,8 +262,8 @@ void S3QueueMetadata::initialize( requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent)); } - if (!settings.s3queue_last_processed_path.value.empty()) - getFileMetadata(settings.s3queue_last_processed_path)->setProcessedAtStartRequests(requests, zookeeper); + if (!settings.last_processed_path.value.empty()) + getFileMetadata(settings.last_processed_path)->setProcessedAtStartRequests(requests, zookeeper); Coordination::Responses responses; auto code = zookeeper->tryMulti(requests, responses); @@ -290,10 +287,10 @@ void S3QueueMetadata::initialize( "of wrong zookeeper path or because of logical error"); } -void S3QueueMetadata::cleanupThreadFunc() +void ObjectStorageQueueMetadata::cleanupThreadFunc() { /// A background task is responsible for maintaining - /// settings.s3queue_tracked_files_limit and max_set_age settings for `unordered` processing mode. + /// settings.tracked_files_limit and max_set_age settings for `unordered` processing mode. if (shutdown_called) return; @@ -312,12 +309,12 @@ void S3QueueMetadata::cleanupThreadFunc() task->scheduleAfter( generateRescheduleInterval( - settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms)); + settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms)); } -void S3QueueMetadata::cleanupThreadFuncImpl() +void ObjectStorageQueueMetadata::cleanupThreadFuncImpl() { - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueueCleanupMaxSetSizeOrTTLMicroseconds); + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds); const auto zk_client = getZooKeeper(); const fs::path zookeeper_processed_path = zookeeper_path / "processed"; const fs::path zookeeper_failed_path = zookeeper_path / "failed"; @@ -355,11 +352,11 @@ void S3QueueMetadata::cleanupThreadFuncImpl() return; } - chassert(settings.s3queue_tracked_files_limit || settings.s3queue_tracked_file_ttl_sec); - const bool check_nodes_limit = settings.s3queue_tracked_files_limit > 0; - const bool check_nodes_ttl = settings.s3queue_tracked_file_ttl_sec > 0; + chassert(settings.tracked_files_limit || settings.tracked_file_ttl_sec); + const bool check_nodes_limit = settings.tracked_files_limit > 0; + const bool check_nodes_ttl = settings.tracked_file_ttl_sec > 0; - const bool nodes_limit_exceeded = nodes_num > settings.s3queue_tracked_files_limit; + const bool nodes_limit_exceeded = nodes_num > settings.tracked_files_limit; if ((!nodes_limit_exceeded || !check_nodes_limit) && !check_nodes_ttl) { LOG_TEST(log, "No limit exceeded"); @@ -381,7 +378,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl() struct Node { std::string zk_path; - S3QueueIFileMetadata::NodeMetadata metadata; + ObjectStorageQueueIFileMetadata::NodeMetadata metadata; }; auto node_cmp = [](const Node & a, const Node & b) { @@ -402,7 +399,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl() std::string metadata_str; if (zk_client->tryGet(path, metadata_str)) { - sorted_nodes.emplace(path, S3QueueIFileMetadata::NodeMetadata::fromString(metadata_str)); + sorted_nodes.emplace(path, ObjectStorageQueueIFileMetadata::NodeMetadata::fromString(metadata_str)); LOG_TEST(log, "Fetched metadata for node {}", path); } else @@ -432,9 +429,9 @@ void S3QueueMetadata::cleanupThreadFuncImpl() wb << fmt::format("Node: {}, path: {}, timestamp: {};\n", node, metadata.file_path, metadata.last_processed_timestamp); return wb.str(); }; - LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.s3queue_tracked_files_limit, settings.s3queue_tracked_file_ttl_sec, get_nodes_str()); + LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.tracked_files_limit, settings.tracked_file_ttl_sec, get_nodes_str()); - size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.s3queue_tracked_files_limit : 0; + size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.tracked_files_limit : 0; for (const auto & node : sorted_nodes) { if (nodes_to_remove) @@ -453,7 +450,7 @@ void S3QueueMetadata::cleanupThreadFuncImpl() else if (check_nodes_ttl) { UInt64 node_age = getCurrentTime() - node.metadata.last_processed_timestamp; - if (node_age >= settings.s3queue_tracked_file_ttl_sec) + if (node_age >= settings.tracked_file_ttl_sec) { LOG_TRACE(log, "Removing node at path {} ({}) because file ttl is reached", node.metadata.file_path, node.zk_path); diff --git a/src/Storages/S3Queue/S3QueueMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h similarity index 62% rename from src/Storages/S3Queue/S3QueueMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h index ef4a9808c68..05060931b5a 100644 --- a/src/Storages/S3Queue/S3QueueMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadata.h @@ -7,23 +7,23 @@ #include #include #include -#include -#include -#include +#include +#include +#include namespace fs = std::filesystem; namespace Poco { class Logger; } namespace DB { -struct S3QueueSettings; -class StorageS3Queue; -struct S3QueueTableMetadata; +struct ObjectStorageQueueSettings; +class StorageObjectStorageQueue; +struct ObjectStorageQueueTableMetadata; struct StorageInMemoryMetadata; using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; /** - * A class for managing S3Queue metadata in zookeeper, e.g. + * A class for managing ObjectStorageQueue metadata in zookeeper, e.g. * the following folders: * - /processed * - /processing @@ -35,7 +35,7 @@ using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; * - /processing * - /failed * - * Depending on S3Queue processing mode (ordered or unordered) + * Depending on ObjectStorageQueue processing mode (ordered or unordered) * we can differently store metadata in /processed node. * * Implements caching of zookeeper metadata for faster responses. @@ -44,24 +44,24 @@ using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; * In case of Unordered mode - if files TTL is enabled or maximum tracked files limit is set * starts a background cleanup thread which is responsible for maintaining them. */ -class S3QueueMetadata +class ObjectStorageQueueMetadata { public: - using FileStatus = S3QueueIFileMetadata::FileStatus; - using FileMetadataPtr = std::shared_ptr; + using FileStatus = ObjectStorageQueueIFileMetadata::FileStatus; + using FileMetadataPtr = std::shared_ptr; using FileStatusPtr = std::shared_ptr; using FileStatuses = std::unordered_map; using Bucket = size_t; using Processor = std::string; - S3QueueMetadata(const fs::path & zookeeper_path_, const S3QueueSettings & settings_); - ~S3QueueMetadata(); + ObjectStorageQueueMetadata(const fs::path & zookeeper_path_, const ObjectStorageQueueSettings & settings_); + ~ObjectStorageQueueMetadata(); void initialize(const ConfigurationPtr & configuration, const StorageInMemoryMetadata & storage_metadata); - void checkSettings(const S3QueueSettings & settings) const; + void checkSettings(const ObjectStorageQueueSettings & settings) const; void shutdown(); - FileMetadataPtr getFileMetadata(const std::string & path, S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info = {}); + FileMetadataPtr getFileMetadata(const std::string & path, ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info = {}); FileStatusPtr getFileStatus(const std::string & path); FileStatuses getFileStatuses() const; @@ -69,20 +69,19 @@ public: /// Method of Ordered mode parallel processing. bool useBucketsForProcessing() const; Bucket getBucketForPath(const std::string & path) const; - S3QueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor); + ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor); - static size_t getBucketsNum(const S3QueueSettings & settings); - static size_t getBucketsNum(const S3QueueTableMetadata & settings); + static size_t getBucketsNum(const ObjectStorageQueueSettings & settings); + static size_t getBucketsNum(const ObjectStorageQueueTableMetadata & settings); private: void cleanupThreadFunc(); void cleanupThreadFuncImpl(); - const S3QueueSettings settings; + const ObjectStorageQueueSettings settings; const fs::path zookeeper_path; const size_t buckets_num; - bool initialized = false; LoggerPtr log; std::atomic_bool shutdown_called = false; diff --git a/src/Storages/S3Queue/S3QueueMetadataFactory.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.cpp similarity index 62% rename from src/Storages/S3Queue/S3QueueMetadataFactory.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.cpp index a319b21ca3e..ffae33d6f41 100644 --- a/src/Storages/S3Queue/S3QueueMetadataFactory.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB @@ -8,20 +8,20 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -S3QueueMetadataFactory & S3QueueMetadataFactory::instance() +ObjectStorageQueueMetadataFactory & ObjectStorageQueueMetadataFactory::instance() { - static S3QueueMetadataFactory ret; + static ObjectStorageQueueMetadataFactory ret; return ret; } -S3QueueMetadataFactory::FilesMetadataPtr -S3QueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const S3QueueSettings & settings) +ObjectStorageQueueMetadataFactory::FilesMetadataPtr +ObjectStorageQueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const ObjectStorageQueueSettings & settings) { std::lock_guard lock(mutex); auto it = metadata_by_path.find(zookeeper_path); if (it == metadata_by_path.end()) { - auto files_metadata = std::make_shared(zookeeper_path, settings); + auto files_metadata = std::make_shared(zookeeper_path, settings); it = metadata_by_path.emplace(zookeeper_path, std::move(files_metadata)).first; } else @@ -32,7 +32,7 @@ S3QueueMetadataFactory::getOrCreate(const std::string & zookeeper_path, const S3 return it->second.metadata; } -void S3QueueMetadataFactory::remove(const std::string & zookeeper_path) +void ObjectStorageQueueMetadataFactory::remove(const std::string & zookeeper_path) { std::lock_guard lock(mutex); auto it = metadata_by_path.find(zookeeper_path); @@ -57,9 +57,9 @@ void S3QueueMetadataFactory::remove(const std::string & zookeeper_path) } } -std::unordered_map S3QueueMetadataFactory::getAll() +std::unordered_map ObjectStorageQueueMetadataFactory::getAll() { - std::unordered_map result; + std::unordered_map result; for (const auto & [zk_path, metadata_and_ref_count] : metadata_by_path) result.emplace(zk_path, metadata_and_ref_count.metadata); return result; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h new file mode 100644 index 00000000000..a93f5ee3d83 --- /dev/null +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueMetadataFactory.h @@ -0,0 +1,37 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class ObjectStorageQueueMetadataFactory final : private boost::noncopyable +{ +public: + using FilesMetadataPtr = std::shared_ptr; + + static ObjectStorageQueueMetadataFactory & instance(); + + FilesMetadataPtr getOrCreate(const std::string & zookeeper_path, const ObjectStorageQueueSettings & settings); + + void remove(const std::string & zookeeper_path); + + std::unordered_map getAll(); + +private: + struct Metadata + { + explicit Metadata(std::shared_ptr metadata_) : metadata(metadata_), ref_count(1) {} + + std::shared_ptr metadata; + /// TODO: the ref count should be kept in keeper, because of the case with distributed processing. + size_t ref_count = 0; + }; + using MetadataByPath = std::unordered_map; + + MetadataByPath metadata_by_path; + std::mutex mutex; +}; + +} diff --git a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp similarity index 84% rename from src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp index d1298b8c4fa..3b711a892c9 100644 --- a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -16,7 +16,7 @@ namespace ErrorCodes namespace { - S3QueueOrderedFileMetadata::Bucket getBucketForPathImpl(const std::string & path, size_t buckets_num) + ObjectStorageQueueOrderedFileMetadata::Bucket getBucketForPathImpl(const std::string & path, size_t buckets_num) { return sipHash64(path) % buckets_num; } @@ -40,28 +40,32 @@ namespace } } -S3QueueOrderedFileMetadata::BucketHolder::BucketHolder( +ObjectStorageQueueOrderedFileMetadata::BucketHolder::BucketHolder( const Bucket & bucket_, int bucket_version_, const std::string & bucket_lock_path_, const std::string & bucket_lock_id_path_, - zkutil::ZooKeeperPtr zk_client_) + zkutil::ZooKeeperPtr zk_client_, + LoggerPtr log_) : bucket_info(std::make_shared(BucketInfo{ .bucket = bucket_, .bucket_version = bucket_version_, .bucket_lock_path = bucket_lock_path_, .bucket_lock_id_path = bucket_lock_id_path_})) , zk_client(zk_client_) + , log(log_) { } -void S3QueueOrderedFileMetadata::BucketHolder::release() +void ObjectStorageQueueOrderedFileMetadata::BucketHolder::release() { if (released) return; released = true; - LOG_TEST(getLogger("S3QueueBucketHolder"), "Releasing bucket {}", bucket_info->bucket); + + LOG_TEST(log, "Releasing bucket {}, version {}", + bucket_info->bucket, bucket_info->bucket_version); Coordination::Requests requests; /// Check that bucket lock version has not changed @@ -72,11 +76,24 @@ void S3QueueOrderedFileMetadata::BucketHolder::release() Coordination::Responses responses; const auto code = zk_client->tryMulti(requests, responses); + + if (code == Coordination::Error::ZOK) + LOG_TEST(log, "Released bucket {}, version {}", + bucket_info->bucket, bucket_info->bucket_version); + else + LOG_TRACE(log, + "Failed to release bucket {}, version {}: {}. " + "This is normal if keeper session expired.", + bucket_info->bucket, bucket_info->bucket_version, code); + zkutil::KeeperMultiException::check(code, requests, responses); } -S3QueueOrderedFileMetadata::BucketHolder::~BucketHolder() +ObjectStorageQueueOrderedFileMetadata::BucketHolder::~BucketHolder() { + if (!released) + LOG_TEST(log, "Releasing bucket ({}) holder in destructor", bucket_info->bucket); + try { release(); @@ -87,7 +104,7 @@ S3QueueOrderedFileMetadata::BucketHolder::~BucketHolder() } } -S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata( +ObjectStorageQueueOrderedFileMetadata::ObjectStorageQueueOrderedFileMetadata( const std::filesystem::path & zk_path_, const std::string & path_, FileStatusPtr file_status_, @@ -95,7 +112,7 @@ S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata( size_t buckets_num_, size_t max_loading_retries_, LoggerPtr log_) - : S3QueueIFileMetadata( + : ObjectStorageQueueIFileMetadata( path_, /* processing_node_path */zk_path_ / "processing" / getNodeName(path_), /* processed_node_path */getProcessedPath(zk_path_, path_, buckets_num_), @@ -109,7 +126,7 @@ S3QueueOrderedFileMetadata::S3QueueOrderedFileMetadata( { } -std::vector S3QueueOrderedFileMetadata::getMetadataPaths(size_t buckets_num) +std::vector ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(size_t buckets_num) { if (buckets_num > 1) { @@ -122,7 +139,7 @@ std::vector S3QueueOrderedFileMetadata::getMetadataPaths(size_t buc return {"failed", "processing"}; } -bool S3QueueOrderedFileMetadata::getMaxProcessedFile( +bool ObjectStorageQueueOrderedFileMetadata::getMaxProcessedFile( NodeMetadata & result, Coordination::Stat * stat, const zkutil::ZooKeeperPtr & zk_client) @@ -130,7 +147,7 @@ bool S3QueueOrderedFileMetadata::getMaxProcessedFile( return getMaxProcessedFile(result, stat, processed_node_path, zk_client); } -bool S3QueueOrderedFileMetadata::getMaxProcessedFile( +bool ObjectStorageQueueOrderedFileMetadata::getMaxProcessedFile( NodeMetadata & result, Coordination::Stat * stat, const std::string & processed_node_path_, @@ -146,15 +163,16 @@ bool S3QueueOrderedFileMetadata::getMaxProcessedFile( return false; } -S3QueueOrderedFileMetadata::Bucket S3QueueOrderedFileMetadata::getBucketForPath(const std::string & path_, size_t buckets_num) +ObjectStorageQueueOrderedFileMetadata::Bucket ObjectStorageQueueOrderedFileMetadata::getBucketForPath(const std::string & path_, size_t buckets_num) { return getBucketForPathImpl(path_, buckets_num); } -S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcquireBucket( +ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr ObjectStorageQueueOrderedFileMetadata::tryAcquireBucket( const std::filesystem::path & zk_path, const Bucket & bucket, - const Processor & processor) + const Processor & processor, + LoggerPtr log_) { const auto zk_client = getZooKeeper(); const auto bucket_lock_path = zk_path / "buckets" / toString(bucket) / "lock"; @@ -172,7 +190,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui bucket_lock_id_path, processor_info, zkutil::CreateMode::Persistent, /* ignore_if_exists */true)); /// Update bucket lock id path. We use its version as a version of ephemeral bucket lock node. - /// (See comment near S3QueueIFileMetadata::processing_node_version). + /// (See comment near ObjectStorageQueueIFileMetadata::processing_node_version). requests.push_back(zkutil::makeSetRequest(bucket_lock_id_path, processor_info, -1)); Coordination::Responses responses; @@ -183,7 +201,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui const auto bucket_lock_version = set_response->stat.version; LOG_TEST( - getLogger("S3QueueOrderedFileMetadata"), + log_, "Processor {} acquired bucket {} for processing (bucket lock version: {})", processor, bucket, bucket_lock_version); @@ -192,7 +210,8 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui bucket_lock_version, bucket_lock_path, bucket_lock_id_path, - zk_client); + zk_client, + log_); } if (code == Coordination::Error::ZNODEEXISTS) @@ -204,7 +223,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected error: {}", code); } -std::pair S3QueueOrderedFileMetadata::setProcessingImpl() +std::pair ObjectStorageQueueOrderedFileMetadata::setProcessingImpl() { /// In one zookeeper transaction do the following: enum RequestType @@ -300,7 +319,7 @@ std::pair S3QueueOrderedFileMetad } } -void S3QueueOrderedFileMetadata::setProcessedAtStartRequests( +void ObjectStorageQueueOrderedFileMetadata::setProcessedAtStartRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr & zk_client) { @@ -318,7 +337,7 @@ void S3QueueOrderedFileMetadata::setProcessedAtStartRequests( } } -void S3QueueOrderedFileMetadata::setProcessedRequests( +void ObjectStorageQueueOrderedFileMetadata::setProcessedRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr & zk_client, const std::string & processed_node_path_, @@ -359,7 +378,7 @@ void S3QueueOrderedFileMetadata::setProcessedRequests( } } -void S3QueueOrderedFileMetadata::setProcessedImpl() +void ObjectStorageQueueOrderedFileMetadata::setProcessedImpl() { /// In one zookeeper transaction do the following: enum RequestType @@ -371,7 +390,6 @@ void S3QueueOrderedFileMetadata::setProcessedImpl() }; const auto zk_client = getZooKeeper(); - const auto node_metadata_str = node_metadata.toString(); std::string failure_reason; while (true) @@ -385,8 +403,11 @@ void S3QueueOrderedFileMetadata::setProcessedImpl() auto code = zk_client->tryMulti(requests, responses); if (code == Coordination::Error::ZOK) { - if (max_loading_retries) - zk_client->tryRemove(failed_node_path + ".retriable", -1); + if (max_loading_retries + && zk_client->tryRemove(failed_node_path + ".retriable", -1) == Coordination::Error::ZOK) + { + LOG_TEST(log, "Removed node {}.retriable", failed_node_path); + } return; } diff --git a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h similarity index 77% rename from src/Storages/S3Queue/S3QueueOrderedFileMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h index 698ec0f54cc..9a997838f4d 100644 --- a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueOrderedFileMetadata.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include @@ -7,7 +7,7 @@ namespace DB { -class S3QueueOrderedFileMetadata : public S3QueueIFileMetadata +class ObjectStorageQueueOrderedFileMetadata : public ObjectStorageQueueIFileMetadata { public: using Processor = std::string; @@ -21,7 +21,7 @@ public: }; using BucketInfoPtr = std::shared_ptr; - explicit S3QueueOrderedFileMetadata( + explicit ObjectStorageQueueOrderedFileMetadata( const std::filesystem::path & zk_path_, const std::string & path_, FileStatusPtr file_status_, @@ -36,9 +36,10 @@ public: static BucketHolderPtr tryAcquireBucket( const std::filesystem::path & zk_path, const Bucket & bucket, - const Processor & processor); + const Processor & processor, + LoggerPtr log_); - static S3QueueOrderedFileMetadata::Bucket getBucketForPath(const std::string & path, size_t buckets_num); + static ObjectStorageQueueOrderedFileMetadata::Bucket getBucketForPath(const std::string & path, size_t buckets_num); static std::vector getMetadataPaths(size_t buckets_num); @@ -72,26 +73,32 @@ private: bool ignore_if_exists); }; -struct S3QueueOrderedFileMetadata::BucketHolder +struct ObjectStorageQueueOrderedFileMetadata::BucketHolder : private boost::noncopyable { BucketHolder( const Bucket & bucket_, int bucket_version_, const std::string & bucket_lock_path_, const std::string & bucket_lock_id_path_, - zkutil::ZooKeeperPtr zk_client_); + zkutil::ZooKeeperPtr zk_client_, + LoggerPtr log_); ~BucketHolder(); Bucket getBucket() const { return bucket_info->bucket; } BucketInfoPtr getBucketInfo() const { return bucket_info; } + void setFinished() { finished = true; } + bool isFinished() const { return finished; } + void release(); private: BucketInfoPtr bucket_info; const zkutil::ZooKeeperPtr zk_client; bool released = false; + bool finished = false; + LoggerPtr log; }; } diff --git a/src/Storages/S3Queue/S3QueueSettings.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp similarity index 52% rename from src/Storages/S3Queue/S3QueueSettings.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp index cb312adc5d9..67743db6197 100644 --- a/src/Storages/S3Queue/S3QueueSettings.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,14 +13,23 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } -IMPLEMENT_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) +IMPLEMENT_SETTINGS_TRAITS(ObjectStorageQueueSettingsTraits, LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS) -void S3QueueSettings::loadFromQuery(ASTStorage & storage_def) +void ObjectStorageQueueSettings::loadFromQuery(ASTStorage & storage_def) { if (storage_def.settings) { try { + /// We support settings starting with s3_ for compatibility. + for (auto & change : storage_def.settings->changes) + { + if (change.name.starts_with("s3queue_")) + change.name = change.name.substr(std::strlen("s3queue_")); + if (change.name == "enable_logging_to_s3queue_log") + change.name = "enable_logging_to_queue_log"; + } + applyChanges(storage_def.settings->changes); } catch (Exception & e) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h new file mode 100644 index 00000000000..ea008c2334e --- /dev/null +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class ASTStorage; + + +#define OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(M, ALIAS) \ + M(ObjectStorageQueueMode, \ + mode, \ + ObjectStorageQueueMode::ORDERED, \ + "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ + "With ordered mode, only the max name of the successfully consumed file stored.", \ + 0) \ + M(ObjectStorageQueueAction, after_processing, ObjectStorageQueueAction::KEEP, "Delete or keep file in after successful processing", 0) \ + M(String, keeper_path, "", "Zookeeper node path", 0) \ + M(UInt32, loading_retries, 10, "Retry loading up to specified number of times", 0) \ + M(UInt32, processing_threads_num, 1, "Number of processing threads", 0) \ + M(UInt32, enable_logging_to_queue_log, 1, "Enable logging to system table system.(s3/azure_)queue_log", 0) \ + M(String, last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ + M(UInt32, tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \ + M(UInt32, polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ + M(UInt32, polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ + M(UInt32, polling_backoff_ms, 1000, "Polling backoff", 0) \ + M(UInt32, tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \ + M(UInt32, cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ + M(UInt32, cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ + M(UInt32, buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \ + M(UInt32, max_processed_files_before_commit, 100, "Number of files which can be processed before being committed to keeper", 0) \ + M(UInt32, max_processed_rows_before_commit, 0, "Number of rows which can be processed before being committed to keeper", 0) \ + M(UInt32, max_processed_bytes_before_commit, 0, "Number of bytes which can be processed before being committed to keeper", 0) \ + M(UInt32, max_processing_time_sec_before_commit, 0, "Timeout in seconds after which to commit files committed to keeper", 0) \ + +#define LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS(M, ALIAS) \ + OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(M, ALIAS) \ + LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(ObjectStorageQueueSettingsTraits, LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS) + + +struct ObjectStorageQueueSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp new file mode 100644 index 00000000000..371a23f5a66 --- /dev/null +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -0,0 +1,723 @@ +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace ProfileEvents +{ + extern const Event ObjectStorageQueuePullMicroseconds; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; +} + +ObjectStorageQueueSource::ObjectStorageQueueObjectInfo::ObjectStorageQueueObjectInfo( + const Source::ObjectInfo & object_info, + ObjectStorageQueueMetadata::FileMetadataPtr file_metadata_) + : Source::ObjectInfo(object_info.relative_path, object_info.metadata) + , file_metadata(file_metadata_) +{ +} + +ObjectStorageQueueSource::FileIterator::FileIterator( + std::shared_ptr metadata_, + std::unique_ptr glob_iterator_, + std::atomic & shutdown_called_, + LoggerPtr logger_) + : StorageObjectStorageSource::IIterator("ObjectStorageQueueIterator") + , metadata(metadata_) + , glob_iterator(std::move(glob_iterator_)) + , shutdown_called(shutdown_called_) + , log(logger_) +{ +} + +bool ObjectStorageQueueSource::FileIterator::isFinished() const +{ + LOG_TEST(log, "Iterator finished: {}, objects to retry: {}", iterator_finished, objects_to_retry.size()); + return iterator_finished + && std::all_of(listed_keys_cache.begin(), listed_keys_cache.end(), [](const auto & v) { return v.second.keys.empty(); }) + && objects_to_retry.empty(); +} + +size_t ObjectStorageQueueSource::FileIterator::estimatedKeysCount() +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method estimateKeysCount is not implemented"); +} + +ObjectStorageQueueSource::Source::ObjectInfoPtr ObjectStorageQueueSource::FileIterator::nextImpl(size_t processor) +{ + Source::ObjectInfoPtr object_info; + ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info; + + while (!shutdown_called) + { + if (metadata->useBucketsForProcessing()) + { + std::lock_guard lock(mutex); + std::tie(object_info, bucket_info) = getNextKeyFromAcquiredBucket(processor); + } + else + { + std::lock_guard lock(mutex); + if (objects_to_retry.empty()) + { + object_info = glob_iterator->next(processor); + if (!object_info) + iterator_finished = true; + } + else + { + object_info = objects_to_retry.front(); + objects_to_retry.pop_front(); + } + } + + if (!object_info) + { + LOG_TEST(log, "No object left"); + return {}; + } + + if (shutdown_called) + { + LOG_TEST(log, "Shutdown was called, stopping file iterator"); + return {}; + } + + auto file_metadata = metadata->getFileMetadata(object_info->relative_path, bucket_info); + if (file_metadata->setProcessing()) + return std::make_shared(*object_info, file_metadata); + } + return {}; +} + +void ObjectStorageQueueSource::FileIterator::returnForRetry(Source::ObjectInfoPtr object_info) +{ + chassert(object_info); + if (metadata->useBucketsForProcessing()) + { + const auto bucket = metadata->getBucketForPath(object_info->relative_path); + std::lock_guard lock(mutex); + listed_keys_cache[bucket].keys.emplace_front(object_info); + } + else + { + std::lock_guard lock(mutex); + objects_to_retry.push_back(object_info); + } +} + +void ObjectStorageQueueSource::FileIterator::releaseFinishedBuckets() +{ + for (const auto & [processor, holders] : bucket_holders) + { + LOG_TEST(log, "Releasing {} bucket holders for processor {}", holders.size(), processor); + + for (auto it = holders.begin(); it != holders.end(); ++it) + { + const auto & holder = *it; + const auto bucket = holder->getBucketInfo()->bucket; + if (!holder->isFinished()) + { + /// Only the last holder in the list of holders can be non-finished. + chassert(std::next(it) == holders.end()); + + /// Do not release non-finished bucket holder. We will continue processing it. + LOG_TEST(log, "Bucket {} is not finished yet, will not release it", bucket); + break; + } + + /// Release bucket lock. + holder->release(); + + /// Reset bucket processor in cached state. + auto cached_info = listed_keys_cache.find(bucket); + if (cached_info != listed_keys_cache.end()) + cached_info->second.processor.reset(); + } + } +} + +std::pair +ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processor) +{ + auto bucket_holder_it = bucket_holders.emplace(processor, std::vector{}).first; + BucketHolder * current_bucket_holder = bucket_holder_it->second.empty() || bucket_holder_it->second.back()->isFinished() + ? nullptr + : bucket_holder_it->second.back().get(); + + auto current_processor = toString(processor); + + LOG_TEST( + log, "Current processor: {}, acquired bucket: {}", + processor, current_bucket_holder ? toString(current_bucket_holder->getBucket()) : "None"); + + while (true) + { + /// Each processing thread gets next path from glob_iterator->next() + /// and checks if corresponding bucket is already acquired by someone. + /// In case it is already acquired, they put the key into listed_keys_cache, + /// so that the thread who acquired the bucket will be able to see + /// those keys without the need to list s3 directory once again. + if (current_bucket_holder) + { + const auto bucket = current_bucket_holder->getBucket(); + auto it = listed_keys_cache.find(bucket); + if (it != listed_keys_cache.end()) + { + /// `bucket_keys` -- keys we iterated so far and which were not taken for processing. + /// `bucket_processor` -- processor id of the thread which has acquired the bucket. + auto & [bucket_keys, bucket_processor] = it->second; + + /// Check correctness just in case. + if (!bucket_processor.has_value()) + { + bucket_processor = current_processor; + } + else if (bucket_processor.value() != current_processor) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Expected current processor {} to be equal to {} for bucket {}", + current_processor, + bucket_processor.has_value() ? toString(bucket_processor.value()) : "None", + bucket); + } + + /// Take next key to process + if (!bucket_keys.empty()) + { + /// Take the key from the front, the order is important. + auto object_info = bucket_keys.front(); + bucket_keys.pop_front(); + + LOG_TEST(log, "Current bucket: {}, will process file: {}", + bucket, object_info->getFileName()); + + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; + } + + LOG_TEST(log, "Cache of bucket {} is empty", bucket); + + /// No more keys in bucket, remove it from cache. + listed_keys_cache.erase(it); + } + else + { + LOG_TEST(log, "Cache of bucket {} is empty", bucket); + } + + if (iterator_finished) + { + /// Bucket is fully processed, but we will release it later + /// - once we write and commit files via commit() method. + current_bucket_holder->setFinished(); + } + } + /// If processing thread has already acquired some bucket + /// and while listing object storage directory gets a key which is in a different bucket, + /// it puts the key into listed_keys_cache to allow others to process it, + /// because one processing thread can acquire only one bucket at a time. + /// Once a thread is finished with its acquired bucket, it checks listed_keys_cache + /// to see if there are keys from buckets not acquired by anyone. + if (!current_bucket_holder) + { + LOG_TEST(log, "Checking caches keys: {}", listed_keys_cache.size()); + + for (auto it = listed_keys_cache.begin(); it != listed_keys_cache.end();) + { + auto & [bucket, bucket_info] = *it; + auto & [bucket_keys, bucket_processor] = bucket_info; + + LOG_TEST(log, "Bucket: {}, cached keys: {}, processor: {}", + bucket, bucket_keys.size(), bucket_processor.has_value() ? toString(bucket_processor.value()) : "None"); + + if (bucket_processor.has_value()) + { + LOG_TEST(log, "Bucket {} is already locked for processing by {} (keys: {})", + bucket, bucket_processor.value(), bucket_keys.size()); + ++it; + continue; + } + + if (bucket_keys.empty()) + { + /// No more keys in bucket, remove it from cache. + /// We still might add new keys to this bucket if !iterator_finished. + it = listed_keys_cache.erase(it); + continue; + } + + auto acquired_bucket = metadata->tryAcquireBucket(bucket, current_processor); + if (!acquired_bucket) + { + LOG_TEST(log, "Bucket {} is already locked for processing (keys: {})", + bucket, bucket_keys.size()); + ++it; + continue; + } + + bucket_holder_it->second.push_back(acquired_bucket); + current_bucket_holder = bucket_holder_it->second.back().get(); + + bucket_processor = current_processor; + + /// Take the key from the front, the order is important. + auto object_info = bucket_keys.front(); + bucket_keys.pop_front(); + + LOG_TEST(log, "Acquired bucket: {}, will process file: {}", + bucket, object_info->getFileName()); + + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; + } + } + + if (iterator_finished) + { + LOG_TEST(log, "Reached the end of file iterator and nothing left in keys cache"); + return {}; + } + + auto object_info = glob_iterator->next(processor); + if (object_info) + { + const auto bucket = metadata->getBucketForPath(object_info->relative_path); + auto & bucket_cache = listed_keys_cache[bucket]; + + LOG_TEST(log, "Found next file: {}, bucket: {}, current bucket: {}, cached_keys: {}", + object_info->getFileName(), bucket, + current_bucket_holder ? toString(current_bucket_holder->getBucket()) : "None", + bucket_cache.keys.size()); + + if (current_bucket_holder) + { + if (current_bucket_holder->getBucket() != bucket) + { + /// Acquired bucket differs from object's bucket, + /// put it into bucket's cache and continue. + bucket_cache.keys.emplace_back(object_info); + continue; + } + /// Bucket is already acquired, process the file. + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; + } + else + { + auto acquired_bucket = metadata->tryAcquireBucket(bucket, current_processor); + if (acquired_bucket) + { + bucket_holder_it->second.push_back(acquired_bucket); + current_bucket_holder = bucket_holder_it->second.back().get(); + + bucket_cache.processor = current_processor; + if (!bucket_cache.keys.empty()) + { + /// We have to maintain ordering between keys, + /// so if some keys are already in cache - start with them. + bucket_cache.keys.emplace_back(object_info); + object_info = bucket_cache.keys.front(); + bucket_cache.keys.pop_front(); + } + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; + } + else + { + LOG_TEST(log, "Bucket {} is already locked for processing", bucket); + bucket_cache.keys.emplace_back(object_info); + continue; + } + } + } + else + { + LOG_TEST(log, "Reached the end of file iterator"); + iterator_finished = true; + + if (listed_keys_cache.empty()) + return {}; + else + continue; + } + } +} + +ObjectStorageQueueSource::ObjectStorageQueueSource( + String name_, + size_t processor_id_, + const Block & header_, + std::unique_ptr internal_source_, + std::shared_ptr files_metadata_, + const ObjectStorageQueueAction & action_, + RemoveFileFunc remove_file_func_, + const NamesAndTypesList & requested_virtual_columns_, + ContextPtr context_, + const std::atomic & shutdown_called_, + const std::atomic & table_is_being_dropped_, + std::shared_ptr system_queue_log_, + const StorageID & storage_id_, + LoggerPtr log_, + size_t max_processed_files_before_commit_, + size_t max_processed_rows_before_commit_, + size_t max_processed_bytes_before_commit_, + size_t max_processing_time_sec_before_commit_, + bool commit_once_processed_) + : ISource(header_) + , WithContext(context_) + , name(std::move(name_)) + , processor_id(processor_id_) + , action(action_) + , files_metadata(files_metadata_) + , internal_source(std::move(internal_source_)) + , requested_virtual_columns(requested_virtual_columns_) + , shutdown_called(shutdown_called_) + , table_is_being_dropped(table_is_being_dropped_) + , system_queue_log(system_queue_log_) + , storage_id(storage_id_) + , max_processed_files_before_commit(max_processed_files_before_commit_) + , max_processed_rows_before_commit(max_processed_rows_before_commit_) + , max_processed_bytes_before_commit(max_processed_bytes_before_commit_) + , max_processing_time_sec_before_commit(max_processing_time_sec_before_commit_) + , commit_once_processed(commit_once_processed_) + , remove_file_func(remove_file_func_) + , log(log_) +{ +} + +String ObjectStorageQueueSource::getName() const +{ + return name; +} + +void ObjectStorageQueueSource::lazyInitialize(size_t processor) +{ + if (initialized) + return; + + LOG_TEST(log, "Initializing a new reader"); + + internal_source->lazyInitialize(processor); + reader = std::move(internal_source->reader); + if (reader) + reader_future = std::move(internal_source->reader_future); + + initialized = true; +} + +Chunk ObjectStorageQueueSource::generate() +{ + Chunk chunk; + try + { + chunk = generateImpl(); + } + catch (...) + { + if (commit_once_processed) + commit(false, getCurrentExceptionMessage(true)); + + throw; + } + + if (!chunk && commit_once_processed) + { + commit(true); + } + return chunk; +} + +Chunk ObjectStorageQueueSource::generateImpl() +{ + lazyInitialize(processor_id); + + while (true) + { + if (!reader) + { + LOG_TEST(log, "No reader"); + break; + } + + const auto * object_info = dynamic_cast(reader.getObjectInfo().get()); + auto file_metadata = object_info->file_metadata; + auto file_status = file_metadata->getFileStatus(); + + if (isCancelled()) + { + reader->cancel(); + + if (processed_rows_from_file) + { + try + { + file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); + } + catch (...) + { + LOG_ERROR(log, "Failed to set file {} as failed: {}", + object_info->relative_path, getCurrentExceptionMessage(true)); + } + } + + LOG_TEST(log, "Query is cancelled"); + break; + } + + const auto & path = reader.getObjectInfo()->getPath(); + + if (shutdown_called) + { + LOG_TEST(log, "Shutdown called"); + + if (processed_rows_from_file == 0) + break; + + if (table_is_being_dropped) + { + LOG_DEBUG( + log, "Table is being dropped, {} rows are already processed from {}, but file is not fully processed", + processed_rows_from_file, path); + + try + { + file_metadata->setFailed("Table is dropped", /* reduce_retry_count */true, /* overwrite_status */false); + } + catch (...) + { + LOG_ERROR(log, "Failed to set file {} as failed: {}", + object_info->relative_path, getCurrentExceptionMessage(true)); + } + + /// Leave the file half processed. Table is being dropped, so we do not care. + break; + } + + LOG_DEBUG(log, "Shutdown called, but file {} is partially processed ({} rows). " + "Will process the file fully and then shutdown", + path, processed_rows_from_file); + } + + try + { + auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::ObjectStorageQueuePullMicroseconds); + + Chunk chunk; + if (reader->pull(chunk)) + { + LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), path); + + file_status->processed_rows += chunk.getNumRows(); + processed_rows_from_file += chunk.getNumRows(); + total_processed_rows += chunk.getNumRows(); + total_processed_bytes += chunk.bytes(); + + VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( + chunk, requested_virtual_columns, + { + .path = path, + .size = reader.getObjectInfo()->metadata->size_bytes + }); + + return chunk; + } + } + catch (...) + { + const auto message = getCurrentExceptionMessage(true); + LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", path, message); + + failed_during_read_files.push_back(file_metadata); + file_status->onFailed(getCurrentExceptionMessage(true)); + + if (processed_rows_from_file == 0) + { + auto * file_iterator = dynamic_cast(internal_source->file_iterator.get()); + chassert(file_iterator); + + if (file_status->retries < file_metadata->getMaxTries()) + file_iterator->returnForRetry(reader.getObjectInfo()); + + /// If we did not process any rows from the failed file, + /// commit all previously processed files, + /// not to lose the work already done. + return {}; + } + + throw; + } + + file_status->setProcessingEndTime(); + file_status.reset(); + + processed_rows_from_file = 0; + processed_files.push_back(file_metadata); + + if (processed_files.size() == max_processed_files_before_commit) + { + LOG_TRACE(log, "Number of max processed files before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + break; + } + + bool rows_or_bytes_or_time_limit_reached = false; + if (max_processed_rows_before_commit + && total_processed_rows == max_processed_rows_before_commit) + { + LOG_TRACE(log, "Number of max processed rows before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + + rows_or_bytes_or_time_limit_reached = true; + } + else if (max_processed_bytes_before_commit + && total_processed_bytes == max_processed_bytes_before_commit) + { + LOG_TRACE(log, "Number of max processed bytes before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + + rows_or_bytes_or_time_limit_reached = true; + } + else if (max_processing_time_sec_before_commit + && total_stopwatch.elapsedSeconds() >= max_processing_time_sec_before_commit) + { + LOG_TRACE(log, "Max processing time before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + + rows_or_bytes_or_time_limit_reached = true; + } + + if (rows_or_bytes_or_time_limit_reached) + { + if (!reader_future.valid()) + break; + + LOG_TRACE(log, "Rows or bytes limit reached, but we have one more file scheduled already, " + "will process it despite the limit"); + } + + if (shutdown_called) + { + LOG_TRACE(log, "Shutdown was called, stopping sync"); + break; + } + + chassert(reader_future.valid()); + reader = reader_future.get(); + + if (!reader) + { + LOG_TEST(log, "Reader finished"); + break; + } + + file_status = files_metadata->getFileStatus(reader.getObjectInfo()->getPath()); + + if (!rows_or_bytes_or_time_limit_reached && processed_files.size() + 1 < max_processed_files_before_commit) + { + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + internal_source->create_reader_pool->wait(); + reader_future = internal_source->createReaderAsync(processor_id); + } + } + + return {}; +} + +void ObjectStorageQueueSource::commit(bool success, const std::string & exception_message) +{ + LOG_TEST(log, "Having {} files to set as {}, failed files: {}", + processed_files.size(), success ? "Processed" : "Failed", failed_during_read_files.size()); + + for (const auto & file_metadata : processed_files) + { + if (success) + { + file_metadata->setProcessed(); + applyActionAfterProcessing(file_metadata->getPath()); + } + else + { + file_metadata->setFailed( + exception_message, + /* reduce_retry_count */false, + /* overwrite_status */true); + + } + appendLogElement(file_metadata->getPath(), *file_metadata->getFileStatus(), processed_rows_from_file, /* processed */success); + } + + for (const auto & file_metadata : failed_during_read_files) + { + /// `exception` from commit args is from insertion to storage. + /// Here we do not used it as failed_during_read_files were not inserted into storage, but skipped. + file_metadata->setFailed( + file_metadata->getFileStatus()->getException(), + /* reduce_retry_count */true, + /* overwrite_status */false); + + appendLogElement(file_metadata->getPath(), *file_metadata->getFileStatus(), processed_rows_from_file, /* processed */false); + } +} + +void ObjectStorageQueueSource::applyActionAfterProcessing(const String & path) +{ + switch (action) + { + case ObjectStorageQueueAction::DELETE: + { + assert(remove_file_func); + remove_file_func(path); + break; + } + case ObjectStorageQueueAction::KEEP: + break; + } +} + +void ObjectStorageQueueSource::appendLogElement( + const std::string & filename, + ObjectStorageQueueMetadata::FileStatus & file_status_, + size_t processed_rows, + bool processed) +{ + if (!system_queue_log) + return; + + ObjectStorageQueueLogElement elem{}; + { + elem = ObjectStorageQueueLogElement + { + .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), + .database = storage_id.database_name, + .table = storage_id.table_name, + .uuid = toString(storage_id.uuid), + .file_name = filename, + .rows_processed = processed_rows, + .status = processed ? ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Processed : ObjectStorageQueueLogElement::ObjectStorageQueueStatus::Failed, + .processing_start_time = file_status_.processing_start_time, + .processing_end_time = file_status_.processing_end_time, + .exception = file_status_.getException(), + }; + } + system_queue_log->add(std::move(elem)); +} + +} diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h new file mode 100644 index 00000000000..ccd87e8a269 --- /dev/null +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -0,0 +1,172 @@ +#pragma once +#include "config.h" + +#include +#include +#include +#include +#include +#include + + +namespace Poco { class Logger; } + +namespace DB +{ + +struct ObjectMetadata; + +class ObjectStorageQueueSource : public ISource, WithContext +{ +public: + using Storage = StorageObjectStorage; + using Source = StorageObjectStorageSource; + using RemoveFileFunc = std::function; + using BucketHolderPtr = ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr; + using BucketHolder = ObjectStorageQueueOrderedFileMetadata::BucketHolder; + + struct ObjectStorageQueueObjectInfo : public Source::ObjectInfo + { + ObjectStorageQueueObjectInfo( + const Source::ObjectInfo & object_info, + ObjectStorageQueueMetadata::FileMetadataPtr file_metadata_); + + ObjectStorageQueueMetadata::FileMetadataPtr file_metadata; + }; + + class FileIterator : public StorageObjectStorageSource::IIterator + { + public: + FileIterator( + std::shared_ptr metadata_, + std::unique_ptr glob_iterator_, + std::atomic & shutdown_called_, + LoggerPtr logger_); + + bool isFinished() const; + + /// Note: + /// List results in s3 are always returned in UTF-8 binary order. + /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) + Source::ObjectInfoPtr nextImpl(size_t processor) override; + + size_t estimatedKeysCount() override; + + /// If the key was taken from iterator via next() call, + /// we might later want to return it back for retrying. + void returnForRetry(Source::ObjectInfoPtr object_info); + + /// Release hold buckets. + /// In fact, they could be released in destructors of BucketHolder, + /// but we anyway try to release them explicitly, + /// because we want to be able to rethrow exceptions if they might happen. + void releaseFinishedBuckets(); + + private: + using Bucket = ObjectStorageQueueMetadata::Bucket; + using Processor = ObjectStorageQueueMetadata::Processor; + + const std::shared_ptr metadata; + const std::unique_ptr glob_iterator; + + std::atomic & shutdown_called; + std::mutex mutex; + LoggerPtr log; + + struct ListedKeys + { + std::deque keys; + std::optional processor; + }; + /// A cache of keys which were iterated via glob_iterator, but not taken for processing. + std::unordered_map listed_keys_cache; + + /// We store a vector of holders, because we cannot release them until processed files are committed. + std::unordered_map> bucket_holders; + + /// Is glob_iterator finished? + std::atomic_bool iterator_finished = false; + + /// Only for processing without buckets. + std::deque objects_to_retry; + + std::pair getNextKeyFromAcquiredBucket(size_t processor); + bool hasKeysForProcessor(const Processor & processor) const; + }; + + ObjectStorageQueueSource( + String name_, + size_t processor_id_, + const Block & header_, + std::unique_ptr internal_source_, + std::shared_ptr files_metadata_, + const ObjectStorageQueueAction & action_, + RemoveFileFunc remove_file_func_, + const NamesAndTypesList & requested_virtual_columns_, + ContextPtr context_, + const std::atomic & shutdown_called_, + const std::atomic & table_is_being_dropped_, + std::shared_ptr system_queue_log_, + const StorageID & storage_id_, + LoggerPtr log_, + size_t max_processed_files_before_commit_, + size_t max_processed_rows_before_commit_, + size_t max_processed_bytes_before_commit_, + size_t max_processing_time_sec_before_commit_, + bool commit_once_processed_); + + static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); + + String getName() const override; + + Chunk generate() override; + + /// Commit files after insertion into storage finished. + /// `success` defines whether insertion was successful or not. + void commit(bool success, const std::string & exception_message = {}); + +private: + const String name; + const size_t processor_id; + const ObjectStorageQueueAction action; + const std::shared_ptr files_metadata; + const std::shared_ptr internal_source; + const NamesAndTypesList requested_virtual_columns; + const std::atomic & shutdown_called; + const std::atomic & table_is_being_dropped; + const std::shared_ptr system_queue_log; + const StorageID storage_id; + const size_t max_processed_files_before_commit; + const size_t max_processed_rows_before_commit; + const size_t max_processed_bytes_before_commit; + const size_t max_processing_time_sec_before_commit; + const bool commit_once_processed; + + RemoveFileFunc remove_file_func; + LoggerPtr log; + + std::vector processed_files; + std::vector failed_during_read_files; + + Source::ReaderHolder reader; + std::future reader_future; + std::atomic initialized{false}; + + size_t processed_rows_from_file = 0; + size_t total_processed_rows = 0; + size_t total_processed_bytes = 0; + + Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; + + Chunk generateImpl(); + void applyActionAfterProcessing(const String & path); + void appendLogElement( + const std::string & filename, + ObjectStorageQueueMetadata::FileStatus & file_status_, + size_t processed_rows, + bool processed); + + void lazyInitialize(size_t processor); +}; + +} diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp similarity index 72% rename from src/Storages/S3Queue/S3QueueTableMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp index ecaa7ad57cc..cb9cdf8e186 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.cpp @@ -3,9 +3,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include @@ -20,33 +20,33 @@ namespace ErrorCodes namespace { - S3QueueMode modeFromString(const std::string & mode) + ObjectStorageQueueMode modeFromString(const std::string & mode) { if (mode == "ordered") - return S3QueueMode::ORDERED; + return ObjectStorageQueueMode::ORDERED; if (mode == "unordered") - return S3QueueMode::UNORDERED; - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected S3Queue mode: {}", mode); + return ObjectStorageQueueMode::UNORDERED; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected ObjectStorageQueue mode: {}", mode); } } -S3QueueTableMetadata::S3QueueTableMetadata( +ObjectStorageQueueTableMetadata::ObjectStorageQueueTableMetadata( const StorageObjectStorage::Configuration & configuration, - const S3QueueSettings & engine_settings, + const ObjectStorageQueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata) { format_name = configuration.format; after_processing = engine_settings.after_processing.toString(); mode = engine_settings.mode.toString(); - tracked_files_limit = engine_settings.s3queue_tracked_files_limit; - tracked_file_ttl_sec = engine_settings.s3queue_tracked_file_ttl_sec; - buckets = engine_settings.s3queue_buckets; - processing_threads_num = engine_settings.s3queue_processing_threads_num; + tracked_files_limit = engine_settings.tracked_files_limit; + tracked_file_ttl_sec = engine_settings.tracked_file_ttl_sec; + buckets = engine_settings.buckets; + processing_threads_num = engine_settings.processing_threads_num; columns = storage_metadata.getColumns().toString(); } -String S3QueueTableMetadata::toString() const +String ObjectStorageQueueTableMetadata::toString() const { Poco::JSON::Object json; json.set("after_processing", after_processing); @@ -65,7 +65,7 @@ String S3QueueTableMetadata::toString() const return oss.str(); } -void S3QueueTableMetadata::read(const String & metadata_str) +void ObjectStorageQueueTableMetadata::read(const String & metadata_str) { Poco::JSON::Parser parser; auto json = parser.parse(metadata_str).extract(); @@ -102,19 +102,19 @@ void S3QueueTableMetadata::read(const String & metadata_str) buckets = json->getValue("buckets"); } -S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str) +ObjectStorageQueueTableMetadata ObjectStorageQueueTableMetadata::parse(const String & metadata_str) { - S3QueueTableMetadata metadata; + ObjectStorageQueueTableMetadata metadata; metadata.read(metadata_str); return metadata; } -void S3QueueTableMetadata::checkEquals(const S3QueueTableMetadata & from_zk) const +void ObjectStorageQueueTableMetadata::checkEquals(const ObjectStorageQueueTableMetadata & from_zk) const { checkImmutableFieldsEquals(from_zk); } -void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const +void ObjectStorageQueueTableMetadata::checkImmutableFieldsEquals(const ObjectStorageQueueTableMetadata & from_zk) const { if (after_processing != from_zk.after_processing) throw Exception( @@ -164,29 +164,29 @@ void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata from_zk.last_processed_path, last_processed_path); - if (modeFromString(mode) == S3QueueMode::ORDERED) + if (modeFromString(mode) == ObjectStorageQueueMode::ORDERED) { if (buckets != from_zk.buckets) { throw Exception( ErrorCodes::METADATA_MISMATCH, - "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. " + "Existing table metadata in ZooKeeper differs in buckets setting. " "Stored in ZooKeeper: {}, local: {}", from_zk.buckets, buckets); } - if (S3QueueMetadata::getBucketsNum(*this) != S3QueueMetadata::getBucketsNum(from_zk)) + if (ObjectStorageQueueMetadata::getBucketsNum(*this) != ObjectStorageQueueMetadata::getBucketsNum(from_zk)) { throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in processing buckets. " "Stored in ZooKeeper: {}, local: {}", - S3QueueMetadata::getBucketsNum(*this), S3QueueMetadata::getBucketsNum(from_zk)); + ObjectStorageQueueMetadata::getBucketsNum(*this), ObjectStorageQueueMetadata::getBucketsNum(from_zk)); } } } -void S3QueueTableMetadata::checkEquals(const S3QueueSettings & current, const S3QueueSettings & expected) +void ObjectStorageQueueTableMetadata::checkEquals(const ObjectStorageQueueSettings & current, const ObjectStorageQueueSettings & expected) { if (current.after_processing != expected.after_processing) throw Exception( @@ -204,48 +204,48 @@ void S3QueueTableMetadata::checkEquals(const S3QueueSettings & current, const S3 expected.mode.toString(), current.mode.toString()); - if (current.s3queue_tracked_files_limit != expected.s3queue_tracked_files_limit) + if (current.tracked_files_limit != expected.tracked_files_limit) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in max set size. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_tracked_files_limit, - current.s3queue_tracked_files_limit); + expected.tracked_files_limit, + current.tracked_files_limit); - if (current.s3queue_tracked_file_ttl_sec != expected.s3queue_tracked_file_ttl_sec) + if (current.tracked_file_ttl_sec != expected.tracked_file_ttl_sec) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in max set age. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_tracked_file_ttl_sec, - current.s3queue_tracked_file_ttl_sec); + expected.tracked_file_ttl_sec, + current.tracked_file_ttl_sec); - if (current.s3queue_last_processed_path.value != expected.s3queue_last_processed_path.value) + if (current.last_processed_path.value != expected.last_processed_path.value) throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in last_processed_path. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_last_processed_path.value, - current.s3queue_last_processed_path.value); + expected.last_processed_path.value, + current.last_processed_path.value); - if (current.mode == S3QueueMode::ORDERED) + if (current.mode == ObjectStorageQueueMode::ORDERED) { - if (current.s3queue_buckets != expected.s3queue_buckets) + if (current.buckets != expected.buckets) { throw Exception( ErrorCodes::METADATA_MISMATCH, - "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. " + "Existing table metadata in ZooKeeper differs in buckets setting. " "Stored in ZooKeeper: {}, local: {}", - expected.s3queue_buckets, current.s3queue_buckets); + expected.buckets, current.buckets); } - if (S3QueueMetadata::getBucketsNum(current) != S3QueueMetadata::getBucketsNum(expected)) + if (ObjectStorageQueueMetadata::getBucketsNum(current) != ObjectStorageQueueMetadata::getBucketsNum(expected)) { throw Exception( ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in processing buckets. " "Stored in ZooKeeper: {}, local: {}", - S3QueueMetadata::getBucketsNum(current), S3QueueMetadata::getBucketsNum(expected)); + ObjectStorageQueueMetadata::getBucketsNum(current), ObjectStorageQueueMetadata::getBucketsNum(expected)); } } } diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h similarity index 50% rename from src/Storages/S3Queue/S3QueueTableMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h index d53b60570ae..bbae06b66c6 100644 --- a/src/Storages/S3Queue/S3QueueTableMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -11,10 +11,10 @@ namespace DB class WriteBuffer; class ReadBuffer; -/** The basic parameters of S3Queue table engine for saving in ZooKeeper. +/** The basic parameters of ObjectStorageQueue table engine for saving in ZooKeeper. * Lets you verify that they match local ones. */ -struct S3QueueTableMetadata +struct ObjectStorageQueueTableMetadata { String format_name; String columns; @@ -26,22 +26,22 @@ struct S3QueueTableMetadata UInt64 processing_threads_num = 1; String last_processed_path; - S3QueueTableMetadata() = default; - S3QueueTableMetadata( + ObjectStorageQueueTableMetadata() = default; + ObjectStorageQueueTableMetadata( const StorageObjectStorage::Configuration & configuration, - const S3QueueSettings & engine_settings, + const ObjectStorageQueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata); void read(const String & metadata_str); - static S3QueueTableMetadata parse(const String & metadata_str); + static ObjectStorageQueueTableMetadata parse(const String & metadata_str); String toString() const; - void checkEquals(const S3QueueTableMetadata & from_zk) const; - static void checkEquals(const S3QueueSettings & current, const S3QueueSettings & expected); + void checkEquals(const ObjectStorageQueueTableMetadata & from_zk) const; + static void checkEquals(const ObjectStorageQueueSettings & current, const ObjectStorageQueueSettings & expected); private: - void checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const; + void checkImmutableFieldsEquals(const ObjectStorageQueueTableMetadata & from_zk) const; }; diff --git a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.cpp similarity index 88% rename from src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.cpp index c61e9557fc2..40751d9c332 100644 --- a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -18,13 +18,13 @@ namespace } } -S3QueueUnorderedFileMetadata::S3QueueUnorderedFileMetadata( +ObjectStorageQueueUnorderedFileMetadata::ObjectStorageQueueUnorderedFileMetadata( const std::filesystem::path & zk_path, const std::string & path_, FileStatusPtr file_status_, size_t max_loading_retries_, LoggerPtr log_) - : S3QueueIFileMetadata( + : ObjectStorageQueueIFileMetadata( path_, /* processing_node_path */zk_path / "processing" / getNodeName(path_), /* processed_node_path */zk_path / "processed" / getNodeName(path_), @@ -35,7 +35,7 @@ S3QueueUnorderedFileMetadata::S3QueueUnorderedFileMetadata( { } -std::pair S3QueueUnorderedFileMetadata::setProcessingImpl() +std::pair ObjectStorageQueueUnorderedFileMetadata::setProcessingImpl() { /// In one zookeeper transaction do the following: enum RequestType @@ -89,7 +89,7 @@ std::pair S3QueueUnorderedFileMet throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state of zookeeper transaction: {}", magic_enum::enum_name(code)); } -void S3QueueUnorderedFileMetadata::setProcessedAtStartRequests( +void ObjectStorageQueueUnorderedFileMetadata::setProcessedAtStartRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr &) { @@ -98,7 +98,7 @@ void S3QueueUnorderedFileMetadata::setProcessedAtStartRequests( processed_node_path, node_metadata.toString(), zkutil::CreateMode::Persistent)); } -void S3QueueUnorderedFileMetadata::setProcessedImpl() +void ObjectStorageQueueUnorderedFileMetadata::setProcessedImpl() { /// In one zookeeper transaction do the following: enum RequestType @@ -130,8 +130,11 @@ void S3QueueUnorderedFileMetadata::setProcessedImpl() const auto code = zk_client->tryMulti(requests, responses); if (code == Coordination::Error::ZOK) { - if (max_loading_retries) - zk_client->tryRemove(failed_node_path + ".retriable", -1); + if (max_loading_retries + && zk_client->tryRemove(failed_node_path + ".retriable", -1) == Coordination::Error::ZOK) + { + LOG_TEST(log, "Removed node {}.retriable", failed_node_path); + } LOG_TRACE(log, "Moved file `{}` to processed (node path: {})", path, processed_node_path); return; diff --git a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h similarity index 75% rename from src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h rename to src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h index 24c2765bf3a..cc5d8a09ec9 100644 --- a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueUnorderedFileMetadata.h @@ -1,17 +1,17 @@ #pragma once -#include +#include #include #include namespace DB { -class S3QueueUnorderedFileMetadata : public S3QueueIFileMetadata +class ObjectStorageQueueUnorderedFileMetadata : public ObjectStorageQueueIFileMetadata { public: using Bucket = size_t; - explicit S3QueueUnorderedFileMetadata( + explicit ObjectStorageQueueUnorderedFileMetadata( const std::filesystem::path & zk_path, const std::string & path_, FileStatusPtr file_status_, diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp similarity index 54% rename from src/Storages/S3Queue/StorageS3Queue.cpp rename to src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 0844d0a479e..95265cde9ea 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -1,10 +1,7 @@ #include -#include "config.h" #include -#include #include -#include #include #include #include @@ -15,29 +12,23 @@ #include #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include #include #include -#include #include +#include #include #include namespace fs = std::filesystem; -namespace ProfileEvents -{ - extern const Event S3DeleteObjects; - extern const Event S3ListObjects; -} - namespace DB { @@ -45,23 +36,22 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; - extern const int S3_ERROR; extern const int QUERY_NOT_ALLOWED; } namespace { - std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const S3QueueSettings & s3queue_settings) + std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const ObjectStorageQueueSettings & queue_settings) { std::string zk_path_prefix = settings.s3queue_default_zookeeper_path.value; if (zk_path_prefix.empty()) zk_path_prefix = "/"; std::string result_zk_path; - if (s3queue_settings.keeper_path.changed) + if (queue_settings.keeper_path.changed) { /// We do not add table uuid here on purpose. - result_zk_path = fs::path(zk_path_prefix) / s3queue_settings.keeper_path.value; + result_zk_path = fs::path(zk_path_prefix) / queue_settings.keeper_path.value; } else { @@ -71,29 +61,67 @@ namespace return zkutil::extractZooKeeperPath(result_zk_path, true); } - void checkAndAdjustSettings(S3QueueSettings & s3queue_settings, const Settings & settings) + void checkAndAdjustSettings( + ObjectStorageQueueSettings & queue_settings, + ASTStorage * engine_args, + bool is_attach, + const LoggerPtr & log) { - if (!s3queue_settings.s3queue_processing_threads_num) + if (!is_attach && !queue_settings.mode.changed) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `s3queue_processing_threads_num` cannot be set to zero"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `mode` (Unordered/Ordered) is not specified, but is required."); + } + /// In case !is_attach, we leave Ordered mode as default for compatibility. + + if (!queue_settings.processing_threads_num) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `processing_threads_num` cannot be set to zero"); } - if (!s3queue_settings.s3queue_enable_logging_to_s3queue_log.changed) - { - s3queue_settings.s3queue_enable_logging_to_s3queue_log = settings.s3queue_enable_logging_to_s3queue_log; - } - - if (s3queue_settings.s3queue_cleanup_interval_min_ms > s3queue_settings.s3queue_cleanup_interval_max_ms) + if (queue_settings.cleanup_interval_min_ms > queue_settings.cleanup_interval_max_ms) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Setting `s3queue_cleanup_interval_min_ms` ({}) must be less or equal to `s3queue_cleanup_interval_max_ms` ({})", - s3queue_settings.s3queue_cleanup_interval_min_ms, s3queue_settings.s3queue_cleanup_interval_max_ms); + "Setting `cleanup_interval_min_ms` ({}) must be less or equal to `cleanup_interval_max_ms` ({})", + queue_settings.cleanup_interval_min_ms, queue_settings.cleanup_interval_max_ms); } + + if (!is_attach && !queue_settings.processing_threads_num.changed) + { + queue_settings.processing_threads_num = std::max(getNumberOfPhysicalCPUCores(), 16); + engine_args->settings->as()->changes.insertSetting( + "processing_threads_num", + queue_settings.processing_threads_num.value); + + LOG_TRACE(log, "Set `processing_threads_num` to {}", queue_settings.processing_threads_num); + } + } + + std::shared_ptr getQueueLog(const ObjectStoragePtr & storage, const ContextPtr & context, const ObjectStorageQueueSettings & table_settings) + { + const auto & settings = context->getSettingsRef(); + switch (storage->getType()) + { + case DB::ObjectStorageType::S3: + { + if (table_settings.enable_logging_to_queue_log || settings.s3queue_enable_logging_to_s3queue_log) + return context->getS3QueueLog(); + return nullptr; + } + case DB::ObjectStorageType::Azure: + { + if (table_settings.enable_logging_to_queue_log) + return context->getAzureQueueLog(); + return nullptr; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected object storage type: {}", storage->getType()); + } + } } -StorageS3Queue::StorageS3Queue( - std::unique_ptr s3queue_settings_, +StorageObjectStorageQueue::StorageObjectStorageQueue( + std::unique_ptr queue_settings_, const ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, @@ -101,16 +129,16 @@ StorageS3Queue::StorageS3Queue( const String & comment, ContextPtr context_, std::optional format_settings_, - ASTStorage * /* engine_args */, + ASTStorage * engine_args, LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_) - , s3queue_settings(std::move(s3queue_settings_)) - , zk_path(chooseZooKeeperPath(table_id_, context_->getSettingsRef(), *s3queue_settings)) + , queue_settings(std::move(queue_settings_)) + , zk_path(chooseZooKeeperPath(table_id_, context_->getSettingsRef(), *queue_settings)) , configuration{configuration_} , format_settings(format_settings_) - , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) - , log(getLogger("StorageS3Queue (" + table_id_.getFullTableName() + ")")) + , reschedule_processing_interval_ms(queue_settings->polling_min_timeout_ms) + , log(getLogger(fmt::format("Storage{}Queue ({})", configuration->getEngineName(), table_id_.getFullTableName()))) { if (configuration->getPath().empty()) { @@ -122,18 +150,10 @@ StorageS3Queue::StorageS3Queue( } else if (!configuration->isPathWithGlobs()) { - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "ObjectStorageQueue url must either end with '/' or contain globs"); } - if (mode == LoadingStrictnessLevel::CREATE - && !context_->getSettingsRef().s3queue_allow_experimental_sharded_mode - && s3queue_settings->mode == S3QueueMode::ORDERED - && (s3queue_settings->s3queue_buckets > 1 || s3queue_settings->s3queue_processing_threads_num > 1)) - { - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue sharded mode is not allowed. To enable use `s3queue_allow_experimental_sharded_mode`"); - } - - checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef()); + checkAndAdjustSettings(*queue_settings, engine_args, mode > LoadingStrictnessLevel::CREATE, log); object_storage = configuration->createObjectStorage(context_, /* is_readonly */true); FormatFactory::instance().checkFormatName(configuration->format); @@ -151,30 +171,30 @@ StorageS3Queue::StorageS3Queue( setInMemoryMetadata(storage_metadata); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); - task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); + task = getContext()->getSchedulePool().createTask("ObjectStorageQueueStreamingTask", [this] { threadFunc(); }); - /// Get metadata manager from S3QueueMetadataFactory, + /// Get metadata manager from ObjectStorageQueueMetadataFactory, /// it will increase the ref count for the metadata object. - /// The ref count is decreased when StorageS3Queue::drop() method is called. - files_metadata = S3QueueMetadataFactory::instance().getOrCreate(zk_path, *s3queue_settings); + /// The ref count is decreased when StorageObjectStorageQueue::drop() method is called. + files_metadata = ObjectStorageQueueMetadataFactory::instance().getOrCreate(zk_path, *queue_settings); try { files_metadata->initialize(configuration_, storage_metadata); } catch (...) { - S3QueueMetadataFactory::instance().remove(zk_path); + ObjectStorageQueueMetadataFactory::instance().remove(zk_path); throw; } } -void StorageS3Queue::startup() +void StorageObjectStorageQueue::startup() { if (task) task->activateAndSchedule(); } -void StorageS3Queue::shutdown(bool is_drop) +void StorageObjectStorageQueue::shutdown(bool is_drop) { table_is_being_dropped = is_drop; shutdown_called = true; @@ -193,31 +213,31 @@ void StorageS3Queue::shutdown(bool is_drop) LOG_TRACE(log, "Shut down storage"); } -void StorageS3Queue::drop() +void StorageObjectStorageQueue::drop() { - S3QueueMetadataFactory::instance().remove(zk_path); + ObjectStorageQueueMetadataFactory::instance().remove(zk_path); } -bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const +bool StorageObjectStorageQueue::supportsSubsetOfColumns(const ContextPtr & context_) const { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context_, format_settings); } -class ReadFromS3Queue : public SourceStepWithFilter +class ReadFromObjectStorageQueue : public SourceStepWithFilter { public: - std::string getName() const override { return "ReadFromS3Queue"; } + std::string getName() const override { return "ReadFromObjectStorageQueue"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; void applyFilters(ActionDAGNodes added_filter_nodes) override; - ReadFromS3Queue( + ReadFromObjectStorageQueue( const Names & column_names_, const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, Block sample_block, ReadFromFormatInfo info_, - std::shared_ptr storage_, + std::shared_ptr storage_, size_t max_block_size_) : SourceStepWithFilter( DataStream{.header = std::move(sample_block)}, @@ -233,15 +253,15 @@ public: private: ReadFromFormatInfo info; - std::shared_ptr storage; + std::shared_ptr storage; size_t max_block_size; - std::shared_ptr iterator; + std::shared_ptr iterator; void createIterator(const ActionsDAG::Node * predicate); }; -void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) +void ReadFromObjectStorageQueue::createIterator(const ActionsDAG::Node * predicate) { if (iterator) return; @@ -250,7 +270,7 @@ void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) } -void ReadFromS3Queue::applyFilters(ActionDAGNodes added_filter_nodes) +void ReadFromObjectStorageQueue::applyFilters(ActionDAGNodes added_filter_nodes) { SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); @@ -261,7 +281,7 @@ void ReadFromS3Queue::applyFilters(ActionDAGNodes added_filter_nodes) createIterator(predicate); } -void StorageS3Queue::read( +void StorageObjectStorageQueue::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, @@ -283,10 +303,10 @@ void StorageS3Queue::read( "Cannot read from {} with attached materialized views", getName()); } - auto this_ptr = std::static_pointer_cast(shared_from_this()); + auto this_ptr = std::static_pointer_cast(shared_from_this()); auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context)); - auto reading = std::make_unique( + auto reading = std::make_unique( column_names, query_info, storage_snapshot, @@ -299,18 +319,20 @@ void StorageS3Queue::read( query_plan.addStep(std::move(reading)); } -void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +void ReadFromObjectStorageQueue::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { Pipes pipes; - const size_t adjusted_num_streams = storage->s3queue_settings->s3queue_processing_threads_num; + const size_t adjusted_num_streams = storage->queue_settings->processing_threads_num; createIterator(nullptr); for (size_t i = 0; i < adjusted_num_streams; ++i) pipes.emplace_back(storage->createSource( - i, + i/* processor_id */, info, iterator, - max_block_size, context)); + max_block_size, + context, + true/* commit_once_processed */)); auto pipe = Pipe::unitePipes(std::move(pipes)); if (pipe.empty()) @@ -322,12 +344,13 @@ void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const pipeline.init(std::move(pipe)); } -std::shared_ptr StorageS3Queue::createSource( +std::shared_ptr StorageObjectStorageQueue::createSource( size_t processor_id, const ReadFromFormatInfo & info, - std::shared_ptr file_iterator, + std::shared_ptr file_iterator, size_t max_block_size, - ContextPtr local_context) + ContextPtr local_context, + bool commit_once_processed) { auto internal_source = std::make_unique( getName(), @@ -345,25 +368,30 @@ std::shared_ptr StorageS3Queue::createSource( { object_storage->removeObject(StoredObject(path)); }; - auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr; - return std::make_shared( + + return std::make_shared( getName(), processor_id, info.source_header, std::move(internal_source), files_metadata, - s3queue_settings->after_processing, + queue_settings->after_processing, file_deleter, info.requested_virtual_columns, local_context, shutdown_called, table_is_being_dropped, - s3_queue_log, + getQueueLog(object_storage, local_context, *queue_settings), getStorageID(), - log); + log, + queue_settings->max_processed_files_before_commit, + queue_settings->max_processed_rows_before_commit, + queue_settings->max_processed_bytes_before_commit, + queue_settings->max_processing_time_sec_before_commit, + commit_once_processed); } -bool StorageS3Queue::hasDependencies(const StorageID & table_id) +bool StorageObjectStorageQueue::hasDependencies(const StorageID & table_id) { // Check if all dependencies are attached auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); @@ -388,7 +416,7 @@ bool StorageS3Queue::hasDependencies(const StorageID & table_id) return true; } -void StorageS3Queue::threadFunc() +void StorageObjectStorageQueue::threadFunc() { if (shutdown_called) return; @@ -406,12 +434,12 @@ void StorageS3Queue::threadFunc() if (streamToViews()) { /// Reset the reschedule interval. - reschedule_processing_interval_ms = s3queue_settings->s3queue_polling_min_timeout_ms; + reschedule_processing_interval_ms = queue_settings->polling_min_timeout_ms; } else { /// Increase the reschedule interval. - reschedule_processing_interval_ms += s3queue_settings->s3queue_polling_backoff_ms; + reschedule_processing_interval_ms += queue_settings->polling_backoff_ms; } LOG_DEBUG(log, "Stopped streaming to {} attached views", dependencies_count); @@ -428,63 +456,98 @@ void StorageS3Queue::threadFunc() if (!shutdown_called) { - LOG_TRACE(log, "Reschedule S3 Queue processing thread in {} ms", reschedule_processing_interval_ms); + LOG_TRACE(log, "Reschedule processing thread in {} ms", reschedule_processing_interval_ms); task->scheduleAfter(reschedule_processing_interval_ms); } } -bool StorageS3Queue::streamToViews() +bool StorageObjectStorageQueue::streamToViews() { + // Create a stream for each consumer and join them in a union stream + // Only insert into dependent views and expect that input blocks contain virtual columns + auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); - auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); - - // Create an INSERT query for streaming data auto insert = std::make_shared(); insert->table_id = table_id; - auto s3queue_context = Context::createCopy(getContext()); - s3queue_context->makeQueryContext(); + auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); + auto queue_context = Context::createCopy(getContext()); + queue_context->makeQueryContext(); - // Create a stream for each consumer and join them in a union stream - // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); - auto block_io = interpreter.execute(); - auto file_iterator = createFileIterator(s3queue_context, nullptr); + auto file_iterator = createFileIterator(queue_context, nullptr); + size_t total_rows = 0; - auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context)); - - Pipes pipes; - pipes.reserve(s3queue_settings->s3queue_processing_threads_num); - for (size_t i = 0; i < s3queue_settings->s3queue_processing_threads_num; ++i) + while (!shutdown_called && !file_iterator->isFinished()) { - auto source = createSource(i, read_from_format_info, file_iterator, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context); - pipes.emplace_back(std::move(source)); + InterpreterInsertQuery interpreter(insert, queue_context, false, true, true); + auto block_io = interpreter.execute(); + auto read_from_format_info = prepareReadingFromFormat( + block_io.pipeline.getHeader().getNames(), + storage_snapshot, + supportsSubsetOfColumns(queue_context)); + + Pipes pipes; + std::vector> sources; + + pipes.reserve(queue_settings->processing_threads_num); + sources.reserve(queue_settings->processing_threads_num); + + for (size_t i = 0; i < queue_settings->processing_threads_num; ++i) + { + auto source = createSource( + i/* processor_id */, + read_from_format_info, + file_iterator, + DBMS_DEFAULT_BUFFER_SIZE, + queue_context, + false/* commit_once_processed */); + + pipes.emplace_back(source); + sources.emplace_back(source); + } + auto pipe = Pipe::unitePipes(std::move(pipes)); + + block_io.pipeline.complete(std::move(pipe)); + block_io.pipeline.setNumThreads(queue_settings->processing_threads_num); + block_io.pipeline.setConcurrencyControl(queue_context->getSettingsRef().use_concurrency_control); + + std::atomic_size_t rows = 0; + block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); + + try + { + CompletedPipelineExecutor executor(block_io.pipeline); + executor.execute(); + } + catch (...) + { + for (auto & source : sources) + source->commit(/* success */false, getCurrentExceptionMessage(true)); + + file_iterator->releaseFinishedBuckets(); + throw; + } + + for (auto & source : sources) + source->commit(/* success */true); + + file_iterator->releaseFinishedBuckets(); + total_rows += rows; } - auto pipe = Pipe::unitePipes(std::move(pipes)); - block_io.pipeline.complete(std::move(pipe)); - block_io.pipeline.setNumThreads(s3queue_settings->s3queue_processing_threads_num); - block_io.pipeline.setConcurrencyControl(s3queue_context->getSettingsRef().use_concurrency_control); - - std::atomic_size_t rows = 0; - block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); - - CompletedPipelineExecutor executor(block_io.pipeline); - executor.execute(); - - return rows > 0; + return total_rows > 0; } -zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const +zkutil::ZooKeeperPtr StorageObjectStorageQueue::getZooKeeper() const { return getContext()->getZooKeeper(); } -std::shared_ptr StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) +std::shared_ptr StorageObjectStorageQueue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate) { auto settings = configuration->getQuerySettings(local_context); auto glob_iterator = std::make_unique( @@ -493,73 +556,4 @@ std::shared_ptr StorageS3Queue::createFileIterator return std::make_shared(files_metadata, std::move(glob_iterator), shutdown_called, log); } -#if USE_AWS_S3 -void registerStorageS3Queue(StorageFactory & factory) -{ - factory.registerStorage( - "S3Queue", - [](const StorageFactory::Arguments & args) - { - auto & engine_args = args.engine_args; - if (engine_args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); - - auto configuration = std::make_shared(); - StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); - - // Use format settings from global server context + settings from - // the SETTINGS clause of the create query. Settings from current - // session and user are ignored. - std::optional format_settings; - - auto s3queue_settings = std::make_unique(); - if (args.storage_def->settings) - { - s3queue_settings->loadFromQuery(*args.storage_def); - FormatFactorySettings user_format_settings; - - // Apply changed settings from global context, but ignore the - // unknown ones, because we only have the format settings here. - const auto & changes = args.getContext()->getSettingsRef().changes(); - for (const auto & change : changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.set(change.name, change.value); - else - LOG_TRACE(getLogger("StorageS3"), "Remove: {}", change.name); - args.storage_def->settings->changes.removeSetting(change.name); - } - - for (const auto & change : args.storage_def->settings->changes) - { - if (user_format_settings.has(change.name)) - user_format_settings.applyChange(change); - } - format_settings = getFormatSettings(args.getContext(), user_format_settings); - } - else - { - format_settings = getFormatSettings(args.getContext()); - } - - return std::make_shared( - std::move(s3queue_settings), - std::move(configuration), - args.table_id, - args.columns, - args.constraints, - args.comment, - args.getContext(), - format_settings, - args.storage_def, - args.mode); - }, - { - .supports_settings = true, - .supports_schema_inference = true, - .source_access_type = AccessType::S3, - }); -} -#endif - } diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h similarity index 73% rename from src/Storages/S3Queue/StorageS3Queue.h rename to src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h index ef83a1ccc25..758721674fe 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h @@ -5,25 +5,24 @@ #include #include #include -#include -#include +#include +#include #include #include -#include #include namespace DB { -class S3QueueMetadata; +class ObjectStorageQueueMetadata; -class StorageS3Queue : public IStorage, WithContext +class StorageObjectStorageQueue : public IStorage, WithContext { public: using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; - StorageS3Queue( - std::unique_ptr s3queue_settings_, + StorageObjectStorageQueue( + std::unique_ptr queue_settings_, ConfigurationPtr configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, @@ -34,7 +33,7 @@ public: ASTStorage * engine_args, LoadingStrictnessLevel mode); - String getName() const override { return "S3Queue"; } + String getName() const override { return "ObjectStorageQueue"; } void read( QueryPlan & query_plan, @@ -53,13 +52,13 @@ public: zkutil::ZooKeeperPtr getZooKeeper() const; private: - friend class ReadFromS3Queue; - using FileIterator = StorageS3QueueSource::FileIterator; + friend class ReadFromObjectStorageQueue; + using FileIterator = ObjectStorageQueueSource::FileIterator; - const std::unique_ptr s3queue_settings; + const std::unique_ptr queue_settings; const fs::path zk_path; - std::shared_ptr files_metadata; + std::shared_ptr files_metadata; ConfigurationPtr configuration; ObjectStoragePtr object_storage; @@ -83,12 +82,13 @@ private: bool supportsDynamicSubcolumns() const override { return true; } std::shared_ptr createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate); - std::shared_ptr createSource( + std::shared_ptr createSource( size_t processor_id, const ReadFromFormatInfo & info, - std::shared_ptr file_iterator, + std::shared_ptr file_iterator, size_t max_block_size, - ContextPtr local_context); + ContextPtr local_context, + bool commit_once_processed); bool hasDependencies(const StorageID & table_id); bool streamToViews(); diff --git a/src/Storages/ObjectStorageQueue/registerQueueStorage.cpp b/src/Storages/ObjectStorageQueue/registerQueueStorage.cpp new file mode 100644 index 00000000000..20968143627 --- /dev/null +++ b/src/Storages/ObjectStorageQueue/registerQueueStorage.cpp @@ -0,0 +1,115 @@ +#include "config.h" + +#include +#include +#include +#include + +#if USE_AWS_S3 +#include +#include +#endif + +#if USE_AZURE_BLOB_STORAGE +#include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +template +StoragePtr createQueueStorage(const StorageFactory::Arguments & args) +{ + auto & engine_args = args.engine_args; + if (engine_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + + auto configuration = std::make_shared(); + StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false); + + // Use format settings from global server context + settings from + // the SETTINGS clause of the create query. Settings from current + // session and user are ignored. + std::optional format_settings; + + auto queue_settings = std::make_unique(); + if (args.storage_def->settings) + { + queue_settings->loadFromQuery(*args.storage_def); + FormatFactorySettings user_format_settings; + + // Apply changed settings from global context, but ignore the + // unknown ones, because we only have the format settings here. + const auto & changes = args.getContext()->getSettingsRef().changes(); + for (const auto & change : changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.set(change.name, change.value); + + args.storage_def->settings->changes.removeSetting(change.name); + } + + for (const auto & change : args.storage_def->settings->changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.applyChange(change); + } + format_settings = getFormatSettings(args.getContext(), user_format_settings); + } + else + { + format_settings = getFormatSettings(args.getContext()); + } + + return std::make_shared( + std::move(queue_settings), + std::move(configuration), + args.table_id, + args.columns, + args.constraints, + args.comment, + args.getContext(), + format_settings, + args.storage_def, + args.mode); +} + +#if USE_AWS_S3 +void registerStorageS3Queue(StorageFactory & factory) +{ + factory.registerStorage( + "S3Queue", + [](const StorageFactory::Arguments & args) + { + return createQueueStorage(args); + }, + { + .supports_settings = true, + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} +#endif + +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzureQueue(StorageFactory & factory) +{ + factory.registerStorage( + "AzureQueue", + [](const StorageFactory::Arguments & args) + { + return createQueueStorage(args); + }, + { + .supports_settings = true, + .supports_schema_inference = true, + .source_access_type = AccessType::AZURE, + }); +} +#endif +} diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 0bcbedee41a..9654b4ef37a 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -16,7 +16,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -64,7 +65,6 @@ ProjectionDescription ProjectionDescription::clone() const other.sample_block_for_keys = sample_block_for_keys; other.metadata = metadata; other.key_size = key_size; - other.is_minmax_count_projection = is_minmax_count_projection; other.primary_key_max_column_name = primary_key_max_column_name; other.partition_value_indices = partition_value_indices; @@ -195,7 +195,6 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( ContextPtr query_context) { ProjectionDescription result; - result.is_minmax_count_projection = true; auto select_query = std::make_shared(); ASTPtr select_expression_list = std::make_shared(); @@ -282,13 +281,11 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( return result; } - void ProjectionDescription::recalculateWithNewColumns(const ColumnsDescription & new_columns, ContextPtr query_context) { *this = getProjectionFromAST(definition_ast, new_columns, query_context); } - Block ProjectionDescription::calculate(const Block & block, ContextPtr context) const { auto mut_context = Context::createCopy(context); @@ -310,7 +307,9 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) builder.resize(1); // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index 75a97697e00..5f091b4421b 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -56,8 +56,6 @@ struct ProjectionDescription size_t key_size = 0; - bool is_minmax_count_projection = false; - /// If a primary key expression is used in the minmax_count projection, store the name of max expression. String primary_key_max_column_name; diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 0baa234e7a3..90792c59d38 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -1,11 +1,8 @@ -#include #include #include #include #include #include -#include -#include #include #include #include @@ -18,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +24,7 @@ #include #include #include +#include namespace DB @@ -34,6 +33,7 @@ namespace DB namespace ErrorCodes { extern const int ROCKSDB_ERROR; + extern const int LOGICAL_ERROR; } static const IColumn::Permutation & getAscendingPermutation(const IColumn & column, IColumn::Permutation & perm) @@ -155,6 +155,7 @@ std::vector EmbeddedRocksDBBulkSink::squash(Chunk chunk) return {}; } +template std::pair EmbeddedRocksDBBulkSink::serializeChunks(std::vector && input_chunks) const { auto serialized_key_column = ColumnString::create(); @@ -167,15 +168,41 @@ std::pair EmbeddedRocksDBBulkSink::seriali auto & serialized_value_offsets = serialized_value_column->getOffsets(); WriteBufferFromVector writer_key(serialized_key_data); WriteBufferFromVector writer_value(serialized_value_data); + FormatSettings format_settings; /// Format settings is 1.5KB, so it's not wise to create it for each row + + /// TTL handling + [[maybe_unused]] auto get_rocksdb_ts = [this](String & ts_string) + { + Int64 curtime = -1; + auto * system_clock = storage.rocksdb_ptr->GetEnv()->GetSystemClock().get(); + rocksdb::Status st = system_clock->GetCurrentTime(&curtime); + if (!st.ok()) + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB error: {}", st.ToString()); + WriteBufferFromString buf(ts_string); + writeBinaryLittleEndian(static_cast(curtime), buf); + }; for (auto && chunk : input_chunks) { + [[maybe_unused]] String ts_string; + if constexpr (with_timestamp) + get_rocksdb_ts(ts_string); + const auto & columns = chunk.getColumns(); auto rows = chunk.getNumRows(); for (size_t i = 0; i < rows; ++i) { for (size_t idx = 0; idx < columns.size(); ++idx) - serializations[idx]->serializeBinary(*columns[idx], i, idx == primary_key_pos ? writer_key : writer_value, {}); + serializations[idx]->serializeBinary(*columns[idx], i, idx == primary_key_pos ? writer_key : writer_value, format_settings); + + /// Append timestamp to end of value, see rocksdb::DBWithTTLImpl::AppendTS + if constexpr (with_timestamp) + { + if (ts_string.size() != sizeof(Int32)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid timestamp size: expect 4, got {}", ts_string.size()); + writeString(ts_string, writer_value); + } + /// String in ColumnString must be null-terminated writeChar('\0', writer_key); writeChar('\0', writer_value); @@ -198,7 +225,8 @@ void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) if (chunks_to_write.empty()) return; - auto [serialized_key_column, serialized_value_column] = serializeChunks(std::move(chunks_to_write)); + auto [serialized_key_column, serialized_value_column] + = storage.ttl > 0 ? serializeChunks(std::move(chunks_to_write)) : serializeChunks(std::move(chunks_to_write)); auto sst_file_path = getTemporarySSTFilePath(); LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "Writing {} rows to SST file {}", serialized_key_column->size(), sst_file_path); if (auto status = buildSSTFile(sst_file_path, *serialized_key_column, *serialized_value_column); !status.ok()) diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 46193b152ca..1f548e7813d 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -1,7 +1,5 @@ #pragma once -#include -#include #include #include #include @@ -49,6 +47,7 @@ private: bool isEnoughSize(const std::vector & input_chunks) const; bool isEnoughSize(const Chunk & chunk) const; /// Serialize chunks to rocksdb key-value pairs + template std::pair serializeChunks(std::vector && input_chunks) const; StorageEmbeddedRocksDB & storage; diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index c3b7ae64c7e..b9d3e071b6c 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #include #include +#include #include @@ -185,11 +187,11 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_, bool read_only_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) + , log(getLogger(fmt::format("StorageEmbeddedRocksDB ({})", getStorageID().getNameForLogs()))) , primary_key{primary_key_} , rocksdb_dir(std::move(rocksdb_dir_)) , ttl(ttl_) , read_only(read_only_) - , log(getLogger(fmt::format("StorageEmbeddedRocksDB ({})", getStorageID().getNameForLogs()))) { setInMemoryMetadata(metadata_); setSettings(std::move(settings_)); @@ -352,6 +354,72 @@ bool StorageEmbeddedRocksDB::optimize( return true; } +static_assert(rocksdb::DEBUG_LEVEL == 0); +static_assert(rocksdb::HEADER_LEVEL == 5); +static constexpr std::array, 6> rocksdb_logger_map = { + std::make_pair(DB::LogsLevel::debug, Poco::Message::Priority::PRIO_DEBUG), + std::make_pair(DB::LogsLevel::information, Poco::Message::Priority::PRIO_INFORMATION), + std::make_pair(DB::LogsLevel::warning, Poco::Message::Priority::PRIO_WARNING), + std::make_pair(DB::LogsLevel::error, Poco::Message::Priority::PRIO_ERROR), + std::make_pair(DB::LogsLevel::fatal, Poco::Message::Priority::PRIO_FATAL), + /// Same as default logger does for HEADER_LEVEL + std::make_pair(DB::LogsLevel::information, Poco::Message::Priority::PRIO_INFORMATION), +}; +class StorageEmbeddedRocksDBLogger : public rocksdb::Logger +{ +public: + explicit StorageEmbeddedRocksDBLogger(const rocksdb::InfoLogLevel log_level, LoggerRawPtr log_) + : rocksdb::Logger(log_level) + , log(log_) + {} + + void Logv(const char * format, va_list ap) override + __attribute__((format(printf, 2, 0))) + { + Logv(rocksdb::InfoLogLevel::DEBUG_LEVEL, format, ap); + } + + void Logv(const rocksdb::InfoLogLevel log_level, const char * format, va_list ap) override + __attribute__((format(printf, 3, 0))) + { + if (log_level < GetInfoLogLevel()) + return; + + auto level = rocksdb_logger_map[log_level]; + + /// stack buffer was enough + { + va_list backup_ap; + va_copy(backup_ap, ap); + std::array stack; + if (vsnprintf(stack.data(), stack.size(), format, backup_ap) < static_cast(stack.size())) + { + va_end(backup_ap); + LOG_IMPL(log, level.first, level.second, "{}", stack.data()); + return; + } + va_end(backup_ap); + } + + /// let's try with a bigger dynamic buffer (but not too huge, since + /// some of rocksdb internal code has also such a limitation, i..e + /// HdfsLogger) + { + va_list backup_ap; + va_copy(backup_ap, ap); + static constexpr int buffer_size = 30000; + std::unique_ptr buffer(new char[buffer_size]); + if (vsnprintf(buffer.get(), buffer_size, format, backup_ap) >= buffer_size) + buffer[buffer_size - 1] = 0; + va_end(backup_ap); + LOG_IMPL(log, level.first, level.second, "{}", buffer.get()); + } + } + +private: + LoggerRawPtr log; +}; + void StorageEmbeddedRocksDB::initDB() { rocksdb::Status status; @@ -448,6 +516,7 @@ void StorageEmbeddedRocksDB::initDB() } } + merged.info_log = std::make_shared(merged.info_log_level, log.get()); merged.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options)); if (ttl > 0) diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 61592398954..a6aa1ba36a4 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -114,17 +114,19 @@ public: private: SinkToStoragePtr getSink(ContextPtr context, const StorageMetadataPtr & metadata_snapshot); + LoggerPtr log; + MultiVersion storage_settings; const String primary_key; + using RocksDBPtr = std::unique_ptr; RocksDBPtr rocksdb_ptr; + mutable SharedMutex rocksdb_ptr_mx; String rocksdb_dir; Int32 ttl; bool read_only; void initDB(); - - LoggerPtr log; }; } diff --git a/src/Storages/S3Queue/S3QueueMetadataFactory.h b/src/Storages/S3Queue/S3QueueMetadataFactory.h deleted file mode 100644 index 80e96f8aa7e..00000000000 --- a/src/Storages/S3Queue/S3QueueMetadataFactory.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once -#include -#include -#include - -namespace DB -{ - -class S3QueueMetadataFactory final : private boost::noncopyable -{ -public: - using FilesMetadataPtr = std::shared_ptr; - - static S3QueueMetadataFactory & instance(); - - FilesMetadataPtr getOrCreate(const std::string & zookeeper_path, const S3QueueSettings & settings); - - void remove(const std::string & zookeeper_path); - - std::unordered_map getAll(); - -private: - struct Metadata - { - explicit Metadata(std::shared_ptr metadata_) : metadata(metadata_), ref_count(1) {} - - std::shared_ptr metadata; - /// TODO: the ref count should be kept in keeper, because of the case with distributed processing. - size_t ref_count = 0; - }; - using MetadataByPath = std::unordered_map; - - MetadataByPath metadata_by_path; - std::mutex mutex; -}; - -} diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h deleted file mode 100644 index c486a7fbb5d..00000000000 --- a/src/Storages/S3Queue/S3QueueSettings.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ -class ASTStorage; - - -#define S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ - M(S3QueueMode, \ - mode, \ - S3QueueMode::UNORDERED, \ - "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ - "With ordered mode, only the max name of the successfully consumed file stored.", \ - 0) \ - M(S3QueueAction, after_processing, S3QueueAction::KEEP, "Delete or keep file in S3 after successful processing", 0) \ - M(String, keeper_path, "", "Zookeeper node path", 0) \ - M(UInt32, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \ - M(UInt32, s3queue_processing_threads_num, 1, "Number of processing threads", 0) \ - M(UInt32, s3queue_enable_logging_to_s3queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \ - M(String, s3queue_last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ - M(UInt32, s3queue_tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \ - M(UInt32, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ - M(UInt32, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ - M(UInt32, s3queue_polling_backoff_ms, 1000, "Polling backoff", 0) \ - M(UInt32, s3queue_tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \ - M(UInt32, s3queue_cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ - M(UInt32, s3queue_cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ - M(UInt32, s3queue_buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \ - -#define LIST_OF_S3QUEUE_SETTINGS(M, ALIAS) \ - S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ - LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) - -DECLARE_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) - - -struct S3QueueSettings : public BaseSettings -{ - void loadFromQuery(ASTStorage & storage_def); -}; - -} diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp deleted file mode 100644 index d8633037ed9..00000000000 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ /dev/null @@ -1,514 +0,0 @@ -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace CurrentMetrics -{ - extern const Metric StorageS3Threads; - extern const Metric StorageS3ThreadsActive; -} - -namespace ProfileEvents -{ - extern const Event S3QueuePullMicroseconds; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int S3_ERROR; - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; -} - -StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo( - const ObjectInfo & object_info, - Metadata::FileMetadataPtr processing_holder_) - : ObjectInfo(object_info.relative_path, object_info.metadata) - , processing_holder(processing_holder_) -{ -} - -StorageS3QueueSource::FileIterator::FileIterator( - std::shared_ptr metadata_, - std::unique_ptr glob_iterator_, - std::atomic & shutdown_called_, - LoggerPtr logger_) - : StorageObjectStorageSource::IIterator("S3QueueIterator") - , metadata(metadata_) - , glob_iterator(std::move(glob_iterator_)) - , shutdown_called(shutdown_called_) - , log(logger_) -{ -} - -size_t StorageS3QueueSource::FileIterator::estimatedKeysCount() -{ - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method estimateKeysCount is not implemented"); -} - -StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl(size_t processor) -{ - ObjectInfoPtr object_info; - S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info; - - while (!shutdown_called) - { - if (metadata->useBucketsForProcessing()) - std::tie(object_info, bucket_info) = getNextKeyFromAcquiredBucket(processor); - else - object_info = glob_iterator->next(processor); - - if (!object_info) - return {}; - - if (shutdown_called) - { - LOG_TEST(log, "Shutdown was called, stopping file iterator"); - return {}; - } - - auto file_metadata = metadata->getFileMetadata(object_info->relative_path, bucket_info); - if (file_metadata->setProcessing()) - return std::make_shared(*object_info, file_metadata); - } - return {}; -} - -std::pair -StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processor) -{ - /// We need this lock to maintain consistency between listing s3 directory - /// and getting/putting result into listed_keys_cache. - std::lock_guard lock(buckets_mutex); - - auto bucket_holder_it = bucket_holders.emplace(processor, nullptr).first; - auto current_processor = toString(processor); - - LOG_TEST( - log, "Current processor: {}, acquired bucket: {}", - processor, bucket_holder_it->second ? toString(bucket_holder_it->second->getBucket()) : "None"); - - while (true) - { - /// Each processing thread gets next path from glob_iterator->next() - /// and checks if corresponding bucket is already acquired by someone. - /// In case it is already acquired, they put the key into listed_keys_cache, - /// so that the thread who acquired the bucket will be able to see - /// those keys without the need to list s3 directory once again. - if (bucket_holder_it->second) - { - const auto bucket = bucket_holder_it->second->getBucket(); - auto it = listed_keys_cache.find(bucket); - if (it != listed_keys_cache.end()) - { - /// `bucket_keys` -- keys we iterated so far and which were not taken for processing. - /// `bucket_processor` -- processor id of the thread which has acquired the bucket. - auto & [bucket_keys, bucket_processor] = it->second; - - /// Check correctness just in case. - if (!bucket_processor.has_value()) - { - bucket_processor = current_processor; - } - else if (bucket_processor.value() != current_processor) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Expected current processor {} to be equal to {} for bucket {}", - current_processor, - bucket_processor.has_value() ? toString(bucket_processor.value()) : "None", - bucket); - } - - /// Take next key to process - if (!bucket_keys.empty()) - { - /// Take the key from the front, the order is important. - auto object_info = bucket_keys.front(); - bucket_keys.pop_front(); - - LOG_TEST(log, "Current bucket: {}, will process file: {}", - bucket, object_info->getFileName()); - - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; - } - - LOG_TEST(log, "Cache of bucket {} is empty", bucket); - - /// No more keys in bucket, remove it from cache. - listed_keys_cache.erase(it); - } - else - { - LOG_TEST(log, "Cache of bucket {} is empty", bucket); - } - - if (iterator_finished) - { - /// Bucket is fully processed - release the bucket. - bucket_holder_it->second->release(); - bucket_holder_it->second.reset(); - } - } - /// If processing thread has already acquired some bucket - /// and while listing s3 directory gets a key which is in a different bucket, - /// it puts the key into listed_keys_cache to allow others to process it, - /// because one processing thread can acquire only one bucket at a time. - /// Once a thread is finished with its acquired bucket, it checks listed_keys_cache - /// to see if there are keys from buckets not acquired by anyone. - if (!bucket_holder_it->second) - { - for (auto it = listed_keys_cache.begin(); it != listed_keys_cache.end();) - { - auto & [bucket, bucket_info] = *it; - auto & [bucket_keys, bucket_processor] = bucket_info; - - LOG_TEST(log, "Bucket: {}, cached keys: {}, processor: {}", - bucket, bucket_keys.size(), bucket_processor.has_value() ? toString(bucket_processor.value()) : "None"); - - if (bucket_processor.has_value()) - { - LOG_TEST(log, "Bucket {} is already locked for processing by {} (keys: {})", - bucket, bucket_processor.value(), bucket_keys.size()); - ++it; - continue; - } - - if (bucket_keys.empty()) - { - /// No more keys in bucket, remove it from cache. - /// We still might add new keys to this bucket if !iterator_finished. - it = listed_keys_cache.erase(it); - continue; - } - - bucket_holder_it->second = metadata->tryAcquireBucket(bucket, current_processor); - if (!bucket_holder_it->second) - { - LOG_TEST(log, "Bucket {} is already locked for processing (keys: {})", - bucket, bucket_keys.size()); - ++it; - continue; - } - - bucket_processor = current_processor; - - /// Take the key from the front, the order is important. - auto object_info = bucket_keys.front(); - bucket_keys.pop_front(); - - LOG_TEST(log, "Acquired bucket: {}, will process file: {}", - bucket, object_info->getFileName()); - - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; - } - } - - if (iterator_finished) - { - LOG_TEST(log, "Reached the end of file iterator and nothing left in keys cache"); - return {}; - } - - auto object_info = glob_iterator->next(processor); - if (object_info) - { - const auto bucket = metadata->getBucketForPath(object_info->relative_path); - auto & bucket_cache = listed_keys_cache[bucket]; - - LOG_TEST(log, "Found next file: {}, bucket: {}, current bucket: {}, cached_keys: {}", - object_info->getFileName(), bucket, - bucket_holder_it->second ? toString(bucket_holder_it->second->getBucket()) : "None", - bucket_cache.keys.size()); - - if (bucket_holder_it->second) - { - if (bucket_holder_it->second->getBucket() != bucket) - { - /// Acquired bucket differs from object's bucket, - /// put it into bucket's cache and continue. - bucket_cache.keys.emplace_back(object_info); - continue; - } - /// Bucket is already acquired, process the file. - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; - } - else - { - bucket_holder_it->second = metadata->tryAcquireBucket(bucket, current_processor); - if (bucket_holder_it->second) - { - bucket_cache.processor = current_processor; - if (!bucket_cache.keys.empty()) - { - /// We have to maintain ordering between keys, - /// so if some keys are already in cache - start with them. - bucket_cache.keys.emplace_back(object_info); - object_info = bucket_cache.keys.front(); - bucket_cache.keys.pop_front(); - } - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; - } - else - { - LOG_TEST(log, "Bucket {} is already locked for processing", bucket); - bucket_cache.keys.emplace_back(object_info); - continue; - } - } - } - else - { - if (bucket_holder_it->second) - { - bucket_holder_it->second->release(); - bucket_holder_it->second.reset(); - } - - LOG_TEST(log, "Reached the end of file iterator"); - iterator_finished = true; - - if (listed_keys_cache.empty()) - return {}; - else - continue; - } - } -} - -StorageS3QueueSource::StorageS3QueueSource( - String name_, - size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, - std::shared_ptr files_metadata_, - const S3QueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, - ContextPtr context_, - const std::atomic & shutdown_called_, - const std::atomic & table_is_being_dropped_, - std::shared_ptr s3_queue_log_, - const StorageID & storage_id_, - LoggerPtr log_) - : ISource(header_) - , WithContext(context_) - , name(std::move(name_)) - , processor_id(processor_id_) - , action(action_) - , files_metadata(files_metadata_) - , internal_source(std::move(internal_source_)) - , requested_virtual_columns(requested_virtual_columns_) - , shutdown_called(shutdown_called_) - , table_is_being_dropped(table_is_being_dropped_) - , s3_queue_log(s3_queue_log_) - , storage_id(storage_id_) - , remove_file_func(remove_file_func_) - , log(log_) -{ -} - -String StorageS3QueueSource::getName() const -{ - return name; -} - -void StorageS3QueueSource::lazyInitialize(size_t processor) -{ - if (initialized) - return; - - internal_source->lazyInitialize(processor); - reader = std::move(internal_source->reader); - if (reader) - reader_future = std::move(internal_source->reader_future); - initialized = true; -} - -Chunk StorageS3QueueSource::generate() -{ - lazyInitialize(processor_id); - - while (true) - { - if (!reader) - break; - - const auto * object_info = dynamic_cast(&reader.getObjectInfo()); - auto file_metadata = object_info->processing_holder; - auto file_status = file_metadata->getFileStatus(); - - if (isCancelled()) - { - reader->cancel(); - - if (processed_rows_from_file) - { - try - { - file_metadata->setFailed("Cancelled"); - } - catch (...) - { - LOG_ERROR(log, "Failed to set file {} as failed: {}", - object_info->relative_path, getCurrentExceptionMessage(true)); - } - - appendLogElement(reader.getObjectInfo().getPath(), *file_status, processed_rows_from_file, false); - } - - break; - } - - const auto & path = reader.getObjectInfo().getPath(); - - if (shutdown_called) - { - if (processed_rows_from_file == 0) - break; - - if (table_is_being_dropped) - { - LOG_DEBUG( - log, "Table is being dropped, {} rows are already processed from {}, but file is not fully processed", - processed_rows_from_file, path); - - try - { - file_metadata->setFailed("Table is dropped"); - } - catch (...) - { - LOG_ERROR(log, "Failed to set file {} as failed: {}", - object_info->relative_path, getCurrentExceptionMessage(true)); - } - - appendLogElement(path, *file_status, processed_rows_from_file, false); - - /// Leave the file half processed. Table is being dropped, so we do not care. - break; - } - - LOG_DEBUG(log, "Shutdown called, but file {} is partially processed ({} rows). " - "Will process the file fully and then shutdown", - path, processed_rows_from_file); - } - - auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters); - SCOPE_EXIT({ CurrentThread::get().attachProfileCountersScope(prev_scope); }); - /// FIXME: if files are compressed, profile counters update does not work fully (s3 related counters are not saved). Why? - - try - { - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::S3QueuePullMicroseconds); - - Chunk chunk; - if (reader->pull(chunk)) - { - LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), path); - - file_status->processed_rows += chunk.getNumRows(); - processed_rows_from_file += chunk.getNumRows(); - - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, requested_virtual_columns, path, reader.getObjectInfo().metadata->size_bytes); - return chunk; - } - } - catch (...) - { - const auto message = getCurrentExceptionMessage(true); - LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", path, message); - - file_metadata->setFailed(message); - - appendLogElement(path, *file_status, processed_rows_from_file, false); - throw; - } - - file_metadata->setProcessed(); - applyActionAfterProcessing(reader.getObjectInfo().relative_path); - - appendLogElement(path, *file_status, processed_rows_from_file, true); - file_status.reset(); - processed_rows_from_file = 0; - - if (shutdown_called) - { - LOG_INFO(log, "Shutdown was called, stopping sync"); - break; - } - - chassert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - break; - - file_status = files_metadata->getFileStatus(reader.getObjectInfo().getPath()); - - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool->wait(); - reader_future = internal_source->createReaderAsync(processor_id); - } - - return {}; -} - -void StorageS3QueueSource::applyActionAfterProcessing(const String & path) -{ - switch (action) - { - case S3QueueAction::DELETE: - { - assert(remove_file_func); - remove_file_func(path); - break; - } - case S3QueueAction::KEEP: - break; - } -} - -void StorageS3QueueSource::appendLogElement( - const std::string & filename, - S3QueueMetadata::FileStatus & file_status_, - size_t processed_rows, - bool processed) -{ - if (!s3_queue_log) - return; - - S3QueueLogElement elem{}; - { - elem = S3QueueLogElement - { - .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), - .database = storage_id.database_name, - .table = storage_id.table_name, - .uuid = toString(storage_id.uuid), - .file_name = filename, - .rows_processed = processed_rows, - .status = processed ? S3QueueLogElement::S3QueueStatus::Processed : S3QueueLogElement::S3QueueStatus::Failed, - .counters_snapshot = file_status_.profile_counters.getPartiallyAtomicSnapshot(), - .processing_start_time = file_status_.processing_start_time, - .processing_end_time = file_status_.processing_end_time, - .exception = file_status_.getException(), - }; - } - s3_queue_log->add(std::move(elem)); -} - -} diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h deleted file mode 100644 index 6e098f8cb63..00000000000 --- a/src/Storages/S3Queue/S3QueueSource.h +++ /dev/null @@ -1,132 +0,0 @@ -#pragma once -#include "config.h" - -#include -#include -#include -#include -#include -#include - - -namespace Poco { class Logger; } - -namespace DB -{ - -struct ObjectMetadata; - -class StorageS3QueueSource : public ISource, WithContext -{ -public: - using Storage = StorageObjectStorage; - using ConfigurationPtr = Storage::ConfigurationPtr; - using GlobIterator = StorageObjectStorageSource::GlobIterator; - using ZooKeeperGetter = std::function; - using RemoveFileFunc = std::function; - using FileStatusPtr = S3QueueMetadata::FileStatusPtr; - using ReaderHolder = StorageObjectStorageSource::ReaderHolder; - using Metadata = S3QueueMetadata; - using ObjectInfo = StorageObjectStorageSource::ObjectInfo; - using ObjectInfoPtr = std::shared_ptr; - using ObjectInfos = std::vector; - - struct S3QueueObjectInfo : public ObjectInfo - { - S3QueueObjectInfo( - const ObjectInfo & object_info, - Metadata::FileMetadataPtr processing_holder_); - - Metadata::FileMetadataPtr processing_holder; - }; - - class FileIterator : public StorageObjectStorageSource::IIterator - { - public: - FileIterator( - std::shared_ptr metadata_, - std::unique_ptr glob_iterator_, - std::atomic & shutdown_called_, - LoggerPtr logger_); - - /// Note: - /// List results in s3 are always returned in UTF-8 binary order. - /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) - ObjectInfoPtr nextImpl(size_t processor) override; - - size_t estimatedKeysCount() override; - - private: - using Bucket = S3QueueMetadata::Bucket; - using Processor = S3QueueMetadata::Processor; - - const std::shared_ptr metadata; - const std::unique_ptr glob_iterator; - - std::atomic & shutdown_called; - std::mutex mutex; - LoggerPtr log; - - std::mutex buckets_mutex; - struct ListedKeys - { - std::deque keys; - std::optional processor; - }; - std::unordered_map listed_keys_cache; - bool iterator_finished = false; - std::unordered_map bucket_holders; - - std::pair getNextKeyFromAcquiredBucket(size_t processor); - }; - - StorageS3QueueSource( - String name_, - size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, - std::shared_ptr files_metadata_, - const S3QueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, - ContextPtr context_, - const std::atomic & shutdown_called_, - const std::atomic & table_is_being_dropped_, - std::shared_ptr s3_queue_log_, - const StorageID & storage_id_, - LoggerPtr log_); - - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - - String getName() const override; - - Chunk generate() override; - -private: - const String name; - const size_t processor_id; - const S3QueueAction action; - const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; - const NamesAndTypesList requested_virtual_columns; - const std::atomic & shutdown_called; - const std::atomic & table_is_being_dropped; - const std::shared_ptr s3_queue_log; - const StorageID storage_id; - - RemoveFileFunc remove_file_func; - LoggerPtr log; - - ReaderHolder reader; - std::future reader_future; - std::atomic initialized{false}; - size_t processed_rows_from_file = 0; - - S3QueueOrderedFileMetadata::BucketHolderPtr current_bucket_holder; - - void applyActionAfterProcessing(const String & path); - void appendLogElement(const std::string & filename, S3QueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); - void lazyInitialize(size_t processor); -}; - -} diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 11e2a2fc5e7..bdf69b9be15 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -140,6 +140,9 @@ class IMergeTreeDataPart; using ManyExpressionActions = std::vector; +struct StorageSnapshot; +using StorageSnapshotPtr = std::shared_ptr; + /** Query along with some additional data, * that can be used during query processing * inside storage engines. @@ -173,6 +176,13 @@ struct SelectQueryInfo /// Local storage limits StorageLimits local_storage_limits; + /// This is a leak of abstraction. + /// StorageMerge replaces storage into query_tree. However, column types may be changed for inner table. + /// So, resolved query tree might have incompatible types. + /// StorageDistributed uses this query tree to calculate a header, throws if we use storage snapshot. + /// To avoid this, we use initial merge_storage_snapshot. + StorageSnapshotPtr merge_storage_snapshot; + /// Cluster for the query. ClusterPtr cluster; /// Optimized cluster for the query. @@ -208,19 +218,9 @@ struct SelectQueryInfo bool need_aggregate = false; PrewhereInfoPtr prewhere_info; - /// Generated by pre-run optimization with StorageDummy. - /// Currently it's used to support StorageMerge PREWHERE optimization. - PrewhereInfoPtr optimized_prewhere_info; - /// If query has aggregate functions bool has_aggregates = false; - /// If query has any filter and no arrayJoin before filter. Used by skipping FINAL - /// Skipping FINAL algorithm will output the original chunk and a column indices of - /// selected rows. If query has filter and doesn't have array join before any filter, - /// we can merge the indices with the first filter in FilterTransform later. - bool has_filters_and_no_array_join_before_filter = false; - ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; } bool settings_limit_offset_done = false; @@ -229,8 +229,8 @@ struct SelectQueryInfo bool is_parameterized_view = false; bool optimize_trivial_count = false; - // If limit is not 0, that means it's a trivial limit query. - UInt64 limit = 0; + // If not 0, that means it's a trivial limit query. + UInt64 trivial_limit = 0; /// For IStorageSystemOneBlock std::vector columns_mask; diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp new file mode 100644 index 00000000000..757136fdf42 --- /dev/null +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp @@ -0,0 +1,201 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String part_name, ColumnStatisticsPtr stats) +{ + if (part_statistics.contains(part_name)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "part {} has been added in column {}", part_name, stats->columnName()); + part_statistics[part_name] = stats; +} + +Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(Float64 val, Float64 rows) const +{ + if (part_statistics.empty()) + return default_normal_cond_factor * rows; + Float64 result = 0; + Float64 part_rows = 0; + for (const auto & [key, estimator] : part_statistics) + { + result += estimator->estimateLess(val); + part_rows += estimator->rowCount(); + } + return result * rows / part_rows; +} + +Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(Float64 val, Float64 rows) const +{ + return rows - estimateLess(val, rows); +} + +Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(Float64 val, Float64 rows) const +{ + if (part_statistics.empty()) + { + if (val < - threshold || val > threshold) + return default_normal_cond_factor * rows; + else + return default_good_cond_factor * rows; + } + Float64 result = 0; + Float64 partial_cnt = 0; + for (const auto & [key, estimator] : part_statistics) + { + result += estimator->estimateEqual(val); + partial_cnt += estimator->rowCount(); + } + return result * rows / partial_cnt; +} + +/// second return value represents how many columns in the node. +static std::pair tryToExtractSingleColumn(const RPNBuilderTreeNode & node) +{ + if (node.isConstant()) + { + return {}; + } + + if (!node.isFunction()) + { + auto column_name = node.getColumnName(); + return {column_name, 1}; + } + + auto function_node = node.toFunctionNode(); + size_t arguments_size = function_node.getArgumentsSize(); + std::pair result; + for (size_t i = 0; i < arguments_size; ++i) + { + auto function_argument = function_node.getArgumentAt(i); + auto subresult = tryToExtractSingleColumn(function_argument); + if (subresult.second == 0) /// the subnode contains 0 column + continue; + else if (subresult.second > 1) /// the subnode contains more than 1 column + return subresult; + else if (result.second == 0 || result.first == subresult.first) /// subnodes contain same column. + result = subresult; + else + return {"", 2}; + } + return result; +} + +std::pair ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const +{ + if (!node.isFunction()) + return {}; + + auto function_node = node.toFunctionNode(); + if (function_node.getArgumentsSize() != 2) + return {}; + + String function_name = function_node.getFunctionName(); + + auto lhs_argument = function_node.getArgumentAt(0); + auto rhs_argument = function_node.getArgumentAt(1); + + auto lhs_argument_column_name = lhs_argument.getColumnName(); + auto rhs_argument_column_name = rhs_argument.getColumnName(); + + bool lhs_argument_is_column = column_name == (lhs_argument_column_name); + bool rhs_argument_is_column = column_name == (rhs_argument_column_name); + + bool lhs_argument_is_constant = lhs_argument.isConstant(); + bool rhs_argument_is_constant = rhs_argument.isConstant(); + + RPNBuilderTreeNode * constant_node = nullptr; + + if (lhs_argument_is_column && rhs_argument_is_constant) + constant_node = &rhs_argument; + else if (lhs_argument_is_constant && rhs_argument_is_column) + constant_node = &lhs_argument; + else + return {}; + + Field output_value; + DataTypePtr output_type; + if (!constant_node->tryGetConstant(output_value, output_type)) + return {}; + + const auto type = output_value.getType(); + Float64 value; + if (type == Field::Types::Int64) + value = output_value.get(); + else if (type == Field::Types::UInt64) + value = output_value.get(); + else if (type == Field::Types::Float64) + value = output_value.get(); + else + return {}; + return std::make_pair(function_name, value); +} + +Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode & node) const +{ + auto result = tryToExtractSingleColumn(node); + if (result.second != 1) + { + return default_unknown_cond_factor; + } + String col = result.first; + auto it = column_estimators.find(col); + + /// If there the estimator of the column is not found or there are no data at all, + /// we use dummy estimation. + bool dummy = total_rows == 0; + ColumnSelectivityEstimator estimator; + if (it != column_estimators.end()) + { + estimator = it->second; + } + else + { + dummy = true; + } + auto [op, val] = extractBinaryOp(node, col); + if (op == "equals") + { + if (dummy) + { + if (val < - threshold || val > threshold) + return default_normal_cond_factor * total_rows; + else + return default_good_cond_factor * total_rows; + } + return estimator.estimateEqual(val, total_rows); + } + else if (op == "less" || op == "lessOrEquals") + { + if (dummy) + return default_normal_cond_factor * total_rows; + return estimator.estimateLess(val, total_rows); + } + else if (op == "greater" || op == "greaterOrEquals") + { + if (dummy) + return default_normal_cond_factor * total_rows; + return estimator.estimateGreater(val, total_rows); + } + else + return default_unknown_cond_factor * total_rows; +} + +void ConditionSelectivityEstimator::merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat) +{ + if (!part_names.contains(part_name)) + { + total_rows += part_rows; + part_names.insert(part_name); + } + if (column_stat != nullptr) + column_estimators[column_stat->columnName()].merge(part_name, column_stat); +} + +} diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.h b/src/Storages/Statistics/ConditionSelectivityEstimator.h new file mode 100644 index 00000000000..f0599742276 --- /dev/null +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.h @@ -0,0 +1,50 @@ +#pragma once + +#include + +namespace DB +{ + +class RPNBuilderTreeNode; + +/// It estimates the selectivity of a condition. +class ConditionSelectivityEstimator +{ +private: + friend class ColumnStatistics; + struct ColumnSelectivityEstimator + { + /// We store the part_name and part_statistics. + /// then simply get selectivity for every part_statistics and combine them. + std::map part_statistics; + + void merge(String part_name, ColumnStatisticsPtr stats); + + Float64 estimateLess(Float64 val, Float64 rows) const; + + Float64 estimateGreater(Float64 val, Float64 rows) const; + + Float64 estimateEqual(Float64 val, Float64 rows) const; + }; + + static constexpr auto default_good_cond_factor = 0.1; + static constexpr auto default_normal_cond_factor = 0.5; + static constexpr auto default_unknown_cond_factor = 1.0; + /// Conditions like "x = N" are considered good if abs(N) > threshold. + /// This is used to assume that condition is likely to have good selectivity. + static constexpr auto threshold = 2; + + UInt64 total_rows = 0; + std::set part_names; + std::map column_estimators; + std::pair extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const; + +public: + /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ... + /// Right now we only support simple condition like col = val / col < val + Float64 estimateRowCount(const RPNBuilderTreeNode & node) const; + + void merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat); +}; + +} diff --git a/src/Storages/Statistics/Estimator.cpp b/src/Storages/Statistics/Estimator.cpp deleted file mode 100644 index e272014c1c2..00000000000 --- a/src/Storages/Statistics/Estimator.cpp +++ /dev/null @@ -1,137 +0,0 @@ -#include -#include - -namespace DB -{ - -/// second return value represents how many columns in the node. -static std::pair tryToExtractSingleColumn(const RPNBuilderTreeNode & node) -{ - if (node.isConstant()) - { - return {}; - } - - if (!node.isFunction()) - { - auto column_name = node.getColumnName(); - return {column_name, 1}; - } - - auto function_node = node.toFunctionNode(); - size_t arguments_size = function_node.getArgumentsSize(); - std::pair result; - for (size_t i = 0; i < arguments_size; ++i) - { - auto function_argument = function_node.getArgumentAt(i); - auto subresult = tryToExtractSingleColumn(function_argument); - if (subresult.second == 0) /// the subnode contains 0 column - continue; - else if (subresult.second > 1) /// the subnode contains more than 1 column - return subresult; - else if (result.second == 0 || result.first == subresult.first) /// subnodes contain same column. - result = subresult; - else - return {"", 2}; - } - return result; -} - -std::pair ConditionEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const std::string & column_name) const -{ - if (!node.isFunction()) - return {}; - - auto function_node = node.toFunctionNode(); - if (function_node.getArgumentsSize() != 2) - return {}; - - std::string function_name = function_node.getFunctionName(); - - auto lhs_argument = function_node.getArgumentAt(0); - auto rhs_argument = function_node.getArgumentAt(1); - - auto lhs_argument_column_name = lhs_argument.getColumnName(); - auto rhs_argument_column_name = rhs_argument.getColumnName(); - - bool lhs_argument_is_column = column_name == (lhs_argument_column_name); - bool rhs_argument_is_column = column_name == (rhs_argument_column_name); - - bool lhs_argument_is_constant = lhs_argument.isConstant(); - bool rhs_argument_is_constant = rhs_argument.isConstant(); - - RPNBuilderTreeNode * constant_node = nullptr; - - if (lhs_argument_is_column && rhs_argument_is_constant) - constant_node = &rhs_argument; - else if (lhs_argument_is_constant && rhs_argument_is_column) - constant_node = &lhs_argument; - else - return {}; - - Field output_value; - DataTypePtr output_type; - if (!constant_node->tryGetConstant(output_value, output_type)) - return {}; - - const auto type = output_value.getType(); - Float64 value; - if (type == Field::Types::Int64) - value = output_value.get(); - else if (type == Field::Types::UInt64) - value = output_value.get(); - else if (type == Field::Types::Float64) - value = output_value.get(); - else - return {}; - return std::make_pair(function_name, value); -} - -Float64 ConditionEstimator::estimateSelectivity(const RPNBuilderTreeNode & node) const -{ - auto result = tryToExtractSingleColumn(node); - if (result.second != 1) - { - return default_unknown_cond_factor; - } - String col = result.first; - auto it = column_estimators.find(col); - - /// If there the estimator of the column is not found or there are no data at all, - /// we use dummy estimation. - bool dummy = total_count == 0; - ColumnEstimator estimator; - if (it != column_estimators.end()) - { - estimator = it->second; - } - else - { - dummy = true; - } - auto [op, val] = extractBinaryOp(node, col); - if (op == "equals") - { - if (val < -threshold || val > threshold) - return default_normal_cond_factor; - else - return default_good_cond_factor; - } - else if (op == "less" || op == "lessThan") - { - if (dummy) - return default_normal_cond_factor; - return estimator.estimateLess(val) / total_count; - } - else if (op == "greater" || op == "greaterThan") - { - if (dummy) - return default_normal_cond_factor; - return estimator.estimateGreater(val) / total_count; - } - else - return default_unknown_cond_factor; -} - - -} diff --git a/src/Storages/Statistics/Estimator.h b/src/Storages/Statistics/Estimator.h deleted file mode 100644 index 903bb57eb80..00000000000 --- a/src/Storages/Statistics/Estimator.h +++ /dev/null @@ -1,111 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -class RPNBuilderTreeNode; - -/// It estimates the selectivity of a condition. -class ConditionEstimator -{ -private: - - static constexpr auto default_good_cond_factor = 0.1; - static constexpr auto default_normal_cond_factor = 0.5; - static constexpr auto default_unknown_cond_factor = 1.0; - /// Conditions like "x = N" are considered good if abs(N) > threshold. - /// This is used to assume that condition is likely to have good selectivity. - static constexpr auto threshold = 2; - - UInt64 total_count = 0; - - /// Minimum estimator for values in a part. It can contains multiple types of statistics. - /// But right now we only have tdigest; - struct PartColumnEstimator - { - UInt64 part_count = 0; - - std::shared_ptr tdigest; - - void merge(StatisticPtr statistic) - { - UInt64 cur_part_count = statistic->count(); - if (part_count == 0) - part_count = cur_part_count; - - if (typeid_cast(statistic.get())) - { - tdigest = std::static_pointer_cast(statistic); - } - } - - Float64 estimateLess(Float64 val) const - { - if (tdigest != nullptr) - return tdigest -> estimateLess(val); - return part_count * default_normal_cond_factor; - } - - Float64 estimateGreator(Float64 val) const - { - if (tdigest != nullptr) - return part_count - tdigest -> estimateLess(val); - return part_count * default_normal_cond_factor; - } - }; - - /// An estimator for a column consists of several PartColumnEstimator. - /// We simply get selectivity for every part estimator and combine the result. - struct ColumnEstimator - { - std::map estimators; - - void merge(std::string part_name, StatisticPtr statistic) - { - estimators[part_name].merge(statistic); - } - - Float64 estimateLess(Float64 val) const - { - if (estimators.empty()) - return default_normal_cond_factor; - Float64 result = 0; - for (const auto & [key, estimator] : estimators) - result += estimator.estimateLess(val); - return result; - } - - Float64 estimateGreater(Float64 val) const - { - if (estimators.empty()) - return default_normal_cond_factor; - Float64 result = 0; - for (const auto & [key, estimator] : estimators) - result += estimator.estimateGreator(val); - return result; - } - }; - - std::map column_estimators; - /// std::optional extractSingleColumn(const RPNBuilderTreeNode & node) const; - std::pair extractBinaryOp(const RPNBuilderTreeNode & node, const std::string & column_name) const; - -public: - ConditionEstimator() = default; - - /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ... - /// Right now we only support simple condition like col = val / col < val - Float64 estimateSelectivity(const RPNBuilderTreeNode & node) const; - - void merge(std::string part_name, UInt64 part_count, StatisticPtr statistic) - { - total_count += part_count; - if (statistic != nullptr) - column_estimators[statistic->columnName()].merge(part_name, statistic); - } -}; - - -} diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 6619eac19dc..fed0bd61c03 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,11 +1,14 @@ #include #include -#include #include -#include +#include +#include +#include #include #include +#include +#include #include namespace DB @@ -15,39 +18,133 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int INCORRECT_QUERY; - extern const int ILLEGAL_STATISTIC; } -void MergeTreeStatisticsFactory::registerCreator(StatisticType stat_type, Creator creator) +/// Version / bitmask of statistics / data of statistics / +enum StatisticsFileVersion : UInt16 { - if (!creators.emplace(stat_type, std::move(creator)).second) - throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistic creator type {} is not unique", stat_type); -} + V0 = 0, +}; -void MergeTreeStatisticsFactory::registerValidator(StatisticType stat_type, Validator validator) +IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) {} + +ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) + : stats_desc(stats_desc_), rows(0) { - if (!validators.emplace(stat_type, std::move(validator)).second) - throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistic validator type {} is not unique", stat_type); - } -StatisticPtr TDigestCreator(const StatisticDescription & stat) +void ColumnStatistics::update(const ColumnPtr & column) { - return StatisticPtr(new TDigestStatistic(stat)); + rows += column->size(); + for (const auto & iter : stats) + { + iter.second->update(column); + } } -void TDigestValidator(const StatisticDescription &, DataTypePtr data_type) +Float64 ColumnStatistics::estimateLess(Float64 val) const { - data_type = removeNullable(data_type); - if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTIC, "TDigest does not support type {}", data_type->getName()); + if (stats.contains(StatisticsType::TDigest)) + return std::static_pointer_cast(stats.at(StatisticsType::TDigest))->estimateLess(val); + return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } +Float64 ColumnStatistics::estimateGreater(Float64 val) const +{ + return rows - estimateLess(val); +} + +Float64 ColumnStatistics::estimateEqual(Float64 val) const +{ + if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) + { + auto uniq_static = std::static_pointer_cast(stats.at(StatisticsType::Uniq)); + /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) + /// for every bucket. + if (uniq_static->getCardinality() < 2048) + { + auto tdigest_static = std::static_pointer_cast(stats.at(StatisticsType::TDigest)); + return tdigest_static->estimateEqual(val); + } + } + if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold) + return rows * ConditionSelectivityEstimator::default_normal_cond_factor; + else + return rows * ConditionSelectivityEstimator::default_good_cond_factor; +} + +void ColumnStatistics::serialize(WriteBuffer & buf) +{ + writeIntBinary(V0, buf); + UInt64 stat_types_mask = 0; + for (const auto & [type, _]: stats) + stat_types_mask |= 1 << UInt8(type); + writeIntBinary(stat_types_mask, buf); + /// We write some basic statistics + writeIntBinary(rows, buf); + /// We write complex statistics + for (const auto & [type, stat_ptr]: stats) + stat_ptr->serialize(buf); +} + +void ColumnStatistics::deserialize(ReadBuffer &buf) +{ + UInt16 version; + readIntBinary(version, buf); + if (version != V0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown file format version: {}", version); + + UInt64 stat_types_mask = 0; + readIntBinary(stat_types_mask, buf); + readIntBinary(rows, buf); + for (auto it = stats.begin(); it != stats.end();) + { + if (!(stat_types_mask & 1 << UInt8(it->first))) + { + stats.erase(it++); + } + else + { + it->second->deserialize(buf); + ++it; + } + } +} + +String ColumnStatistics::getFileName() const +{ + return STATS_FILE_PREFIX + columnName(); +} + +const String & ColumnStatistics::columnName() const +{ + return stats_desc.column_name; +} + +UInt64 ColumnStatistics::rowCount() const +{ + return rows; +} + +void MergeTreeStatisticsFactory::registerCreator(StatisticsType stats_type, Creator creator) +{ + if (!creators.emplace(stats_type, std::move(creator)).second) + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistics creator type {} is not unique", stats_type); +} + +void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Validator validator) +{ + if (!validators.emplace(stats_type, std::move(validator)).second) + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeStatisticsFactory: the statistics validator type {} is not unique", stats_type); + +} MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerCreator(TDigest, TDigestCreator); - registerValidator(TDigest, TDigestValidator); + registerCreator(StatisticsType::TDigest, TDigestCreator); + registerCreator(StatisticsType::Uniq, UniqCreator); + registerValidator(StatisticsType::TDigest, TDigestValidator); + registerValidator(StatisticsType::Uniq, UniqValidator); } MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance() @@ -56,33 +153,42 @@ MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance() return instance; } -void MergeTreeStatisticsFactory::validate(const StatisticDescription & stat, DataTypePtr data_type) const +void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const { - auto it = validators.find(stat.type); - if (it == validators.end()) + for (const auto & [type, desc] : stats.types_to_desc) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown Statistic type '{}'", stat.type); + auto it = validators.find(type); + if (it == validators.end()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown Statistic type '{}'", type); + } + it->second(desc, data_type); } - it->second(stat, data_type); } -StatisticPtr MergeTreeStatisticsFactory::get(const StatisticDescription & stat) const +ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescription & stats) const { - auto it = creators.find(stat.type); - if (it == creators.end()) + ColumnStatisticsPtr column_stat = std::make_shared(stats); + for (const auto & [type, desc] : stats.types_to_desc) { - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Unknown Statistic type '{}'. Available types: tdigest", stat.type); + auto it = creators.find(type); + if (it == creators.end()) + { + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Unknown Statistic type '{}'. Available types: tdigest, uniq", type); + } + auto stat_ptr = (it->second)(desc, stats.data_type); + column_stat->stats[type] = stat_ptr; } - return std::make_shared(stat); + return column_stat; } -Statistics MergeTreeStatisticsFactory::getMany(const ColumnsDescription & columns) const +ColumnsStatistics MergeTreeStatisticsFactory::getMany(const ColumnsDescription & columns) const { - Statistics result; + ColumnsStatistics result; for (const auto & col : columns) - if (col.stat) - result.push_back(get(*col.stat)); + if (!col.statistics.empty()) + result.push_back(get(col.statistics)); return result; } diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index e6d9666ce1c..2ab1337af02 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -1,12 +1,8 @@ #pragma once -#include #include -#include - #include -#include #include #include #include @@ -14,38 +10,23 @@ #include -/// this is for user-defined statistic. -constexpr auto STAT_FILE_PREFIX = "statistic_"; -constexpr auto STAT_FILE_SUFFIX = ".stat"; - namespace DB { -class IStatistic; -using StatisticPtr = std::shared_ptr; -using Statistics = std::vector; +/// this is for user-defined statistic. +constexpr auto STATS_FILE_PREFIX = "statistics_"; +constexpr auto STATS_FILE_SUFFIX = ".stats"; -/// Statistic contains the distribution of values in a column. -/// right now we support -/// - tdigest -class IStatistic +/// Statistics describe properties of the values in the column, +/// e.g. how many unique values exist, +/// what are the N most frequent values, +/// how frequent is a value V, etc. +class IStatistics { public: - explicit IStatistic(const StatisticDescription & stat_) - : stat(stat_) - { - } - virtual ~IStatistic() = default; + explicit IStatistics(const SingleStatisticsDescription & stat_); - String getFileName() const - { - return STAT_FILE_PREFIX + columnName(); - } - - const String & columnName() const - { - return stat.column_name; - } + virtual ~IStatistics() = default; virtual void serialize(WriteBuffer & buf) = 0; @@ -53,40 +34,68 @@ public: virtual void update(const ColumnPtr & column) = 0; - virtual UInt64 count() = 0; - protected: + SingleStatisticsDescription stat; +}; - StatisticDescription stat; +using StatisticsPtr = std::shared_ptr; +class ColumnStatistics +{ +public: + explicit ColumnStatistics(const ColumnStatisticsDescription & stats_); + void serialize(WriteBuffer & buf); + void deserialize(ReadBuffer & buf); + String getFileName() const; + + const String & columnName() const; + + UInt64 rowCount() const; + + void update(const ColumnPtr & column); + + Float64 estimateLess(Float64 val) const; + + Float64 estimateGreater(Float64 val) const; + + Float64 estimateEqual(Float64 val) const; + +private: + + friend class MergeTreeStatisticsFactory; + ColumnStatisticsDescription stats_desc; + std::map stats; + UInt64 rows; /// the number of rows of the column }; class ColumnsDescription; +using ColumnStatisticsPtr = std::shared_ptr; +using ColumnsStatistics = std::vector; class MergeTreeStatisticsFactory : private boost::noncopyable { public: static MergeTreeStatisticsFactory & instance(); - void validate(const StatisticDescription & stat, DataTypePtr data_type) const; + void validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const; - using Creator = std::function; + using Creator = std::function; - using Validator = std::function; + using Validator = std::function; - StatisticPtr get(const StatisticDescription & stat) const; + ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const; - Statistics getMany(const ColumnsDescription & columns) const; + ColumnsStatistics getMany(const ColumnsDescription & columns) const; - void registerCreator(StatisticType type, Creator creator); - void registerValidator(StatisticType type, Validator validator); + void registerCreator(StatisticsType type, Creator creator); + void registerValidator(StatisticsType type, Validator validator); protected: MergeTreeStatisticsFactory(); private: - using Creators = std::unordered_map; - using Validators = std::unordered_map; + using Creators = std::unordered_map; + using Validators = std::unordered_map; Creators creators; Validators validators; }; diff --git a/src/Storages/Statistics/TDigestStatistic.cpp b/src/Storages/Statistics/TDigestStatistic.cpp deleted file mode 100644 index efb4282d203..00000000000 --- a/src/Storages/Statistics/TDigestStatistic.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include - -namespace DB -{ - -Float64 TDigestStatistic::estimateLess(Float64 val) const -{ - return data.getCountLessThan(val); -} - -void TDigestStatistic::serialize(WriteBuffer & buf) -{ - data.serialize(buf); -} - -void TDigestStatistic::deserialize(ReadBuffer & buf) -{ - data.deserialize(buf); -} - -void TDigestStatistic::update(const ColumnPtr & column) -{ - size_t size = column->size(); - - for (size_t i = 0; i < size; ++i) - { - /// TODO: support more types. - Float64 value = column->getFloat64(i); - data.add(value, 1); - } -} - -UInt64 TDigestStatistic::count() -{ - return static_cast(data.count); -} - -} diff --git a/src/Storages/Statistics/TDigestStatistic.h b/src/Storages/Statistics/TDigestStatistic.h deleted file mode 100644 index 295b5f69900..00000000000 --- a/src/Storages/Statistics/TDigestStatistic.h +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -/// TDigestStatistic is a kind of histogram. -class TDigestStatistic : public IStatistic -{ - QuantileTDigest data; -public: - explicit TDigestStatistic(const StatisticDescription & stat_) : IStatistic(stat_) - { - } - - Float64 estimateLess(Float64 val) const; - - void serialize(WriteBuffer & buf) override; - - void deserialize(ReadBuffer & buf) override; - - void update(const ColumnPtr & column) override; - - UInt64 count() override; -}; - -} diff --git a/src/Storages/Statistics/TDigestStatistics.cpp b/src/Storages/Statistics/TDigestStatistics.cpp new file mode 100644 index 00000000000..aa5662c979d --- /dev/null +++ b/src/Storages/Statistics/TDigestStatistics.cpp @@ -0,0 +1,60 @@ +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_STATISTICS; +} + +TDigestStatistics::TDigestStatistics(const SingleStatisticsDescription & stat_): + IStatistics(stat_) +{ +} + +Float64 TDigestStatistics::estimateLess(Float64 val) const +{ + return data.getCountLessThan(val); +} + +Float64 TDigestStatistics::estimateEqual(Float64 val) const +{ + return data.getCountEqual(val); +} + +void TDigestStatistics::serialize(WriteBuffer & buf) +{ + data.serialize(buf); +} + +void TDigestStatistics::deserialize(ReadBuffer & buf) +{ + data.deserialize(buf); +} + +void TDigestStatistics::update(const ColumnPtr & column) +{ + size_t size = column->size(); + + for (size_t i = 0; i < size; ++i) + { + /// TODO: support more types. + Float64 value = column->getFloat64(i); + data.add(value, 1); + } +} + +StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) +{ + return std::make_shared(stat); +} + +void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +{ + data_type = removeNullable(data_type); + if (!data_type->isValueRepresentedByNumber()) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' does not support type {}", data_type->getName()); +} + +} diff --git a/src/Storages/Statistics/TDigestStatistics.h b/src/Storages/Statistics/TDigestStatistics.h new file mode 100644 index 00000000000..7c361b8751f --- /dev/null +++ b/src/Storages/Statistics/TDigestStatistics.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +namespace DB +{ + + +/// TDigestStatistic is a kind of histogram. +class TDigestStatistics : public IStatistics +{ +public: + explicit TDigestStatistics(const SingleStatisticsDescription & stat_); + + Float64 estimateLess(Float64 val) const; + + Float64 estimateEqual(Float64 val) const; + + void serialize(WriteBuffer & buf) override; + + void deserialize(ReadBuffer & buf) override; + + void update(const ColumnPtr & column) override; +private: + QuantileTDigest data; +}; + +StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); + +} diff --git a/src/Storages/Statistics/UniqStatistics.cpp b/src/Storages/Statistics/UniqStatistics.cpp new file mode 100644 index 00000000000..fc748e769ca --- /dev/null +++ b/src/Storages/Statistics/UniqStatistics.cpp @@ -0,0 +1,66 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_STATISTICS; +} + +UniqStatistics::UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) + : IStatistics(stat_) +{ + arena = std::make_unique(); + AggregateFunctionProperties properties; + collector = AggregateFunctionFactory::instance().get("uniq", NullsAction::IGNORE_NULLS, {data_type}, Array(), properties); + data = arena->alignedAlloc(collector->sizeOfData(), collector->alignOfData()); + collector->create(data); +} + +UniqStatistics::~UniqStatistics() +{ + collector->destroy(data); +} + +UInt64 UniqStatistics::getCardinality() +{ + auto column = DataTypeUInt64().createColumn(); + collector->insertResultInto(data, *column, nullptr); + return column->getUInt(0); +} + +void UniqStatistics::serialize(WriteBuffer & buf) +{ + collector->serialize(data, buf); +} + +void UniqStatistics::deserialize(ReadBuffer & buf) +{ + collector->deserialize(data, buf); +} + +void UniqStatistics::update(const ColumnPtr & column) +{ + /// TODO(hanfei): For low cardinality, it's very slow to convert to full column. We can read the dictionary directly. + /// Here we intend to avoid crash in CI. + auto col_ptr = column->convertToFullColumnIfLowCardinality(); + const IColumn * raw_ptr = col_ptr.get(); + collector->addBatchSinglePlace(0, column->size(), data, &(raw_ptr), nullptr); +} + +void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +{ + data_type = removeNullable(data_type); + if (!data_type->isValueRepresentedByNumber()) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' does not support type {}", data_type->getName()); +} + +StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +{ + return std::make_shared(stat, data_type); +} + +} diff --git a/src/Storages/Statistics/UniqStatistics.h b/src/Storages/Statistics/UniqStatistics.h new file mode 100644 index 00000000000..0d86a6e458a --- /dev/null +++ b/src/Storages/Statistics/UniqStatistics.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class UniqStatistics : public IStatistics +{ +public: + UniqStatistics(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); + + ~UniqStatistics() override; + + UInt64 getCardinality(); + + void serialize(WriteBuffer & buf) override; + + void deserialize(ReadBuffer & buf) override; + + void update(const ColumnPtr & column) override; + +private: + + std::unique_ptr arena; + AggregateFunctionPtr collector; + AggregateDataPtr data; + +}; + +StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); +void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); + +} diff --git a/src/Storages/Statistics/tests/gtest_stats.cpp b/src/Storages/Statistics/tests/gtest_stats.cpp index 45f8271be97..f94f310be56 100644 --- a/src/Storages/Statistics/tests/gtest_stats.cpp +++ b/src/Storages/Statistics/tests/gtest_stats.cpp @@ -1,6 +1,6 @@ #include -#include +#include TEST(Statistics, TDigestLessThan) { diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index 7d4226f2fbe..f10fb78f933 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -1,14 +1,16 @@ +#include + #include #include #include #include -#include +#include #include #include +#include #include #include #include -#include #include #include @@ -19,93 +21,187 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_QUERY; + extern const int ILLEGAL_STATISTICS; extern const int LOGICAL_ERROR; }; -StatisticDescription & StatisticDescription::operator=(const StatisticDescription & other) +SingleStatisticsDescription & SingleStatisticsDescription::operator=(const SingleStatisticsDescription & other) { if (this == &other) return *this; type = other.type; - column_name = other.column_name; ast = other.ast ? other.ast->clone() : nullptr; return *this; } -StatisticDescription & StatisticDescription::operator=(StatisticDescription && other) noexcept +SingleStatisticsDescription & SingleStatisticsDescription::operator=(SingleStatisticsDescription && other) noexcept { if (this == &other) return *this; - type = std::exchange(other.type, StatisticType{}); - column_name = std::move(other.column_name); + type = std::exchange(other.type, StatisticsType{}); ast = other.ast ? other.ast->clone() : nullptr; other.ast.reset(); return *this; } -StatisticType stringToType(String type) +static StatisticsType stringToStatisticsType(String type) { if (type == "tdigest") - return TDigest; - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}. We only support statistic type `tdigest` right now.", type); + return StatisticsType::TDigest; + if (type == "uniq") + return StatisticsType::Uniq; + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type); } -String StatisticDescription::getTypeName() const +String SingleStatisticsDescription::getTypeName() const { - if (type == TDigest) - return "tdigest"; - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type: {}. We only support statistic type `tdigest` right now.", type); -} - -std::vector StatisticDescription::getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns) -{ - const auto * stat_definition = definition_ast->as(); - if (!stat_definition) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create statistic from non ASTStatisticDeclaration AST"); - - std::vector stats; - stats.reserve(stat_definition->columns->children.size()); - for (const auto & column_ast : stat_definition->columns->children) + switch (type) { - StatisticDescription stat; - stat.type = stringToType(Poco::toLower(stat_definition->type)); - String column_name = column_ast->as().name(); + case StatisticsType::TDigest: + return "TDigest"; + case StatisticsType::Uniq: + return "Uniq"; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type); + } +} - if (!columns.hasPhysical(column_name)) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", column_name); +SingleStatisticsDescription::SingleStatisticsDescription(StatisticsType type_, ASTPtr ast_) + : type(type_), ast(ast_) +{} - const auto & column = columns.getPhysical(column_name); - stat.column_name = column.name; - stat.ast = makeASTFunction("STATISTIC", std::make_shared(stat_definition->type)); - stats.push_back(stat); +bool SingleStatisticsDescription::operator==(const SingleStatisticsDescription & other) const +{ + return type == other.type; +} + +bool ColumnStatisticsDescription::operator==(const ColumnStatisticsDescription & other) const +{ + return types_to_desc == other.types_to_desc; +} + +bool ColumnStatisticsDescription::empty() const +{ + return types_to_desc.empty(); +} + +bool ColumnStatisticsDescription::contains(const String & stat_type) const +{ + return types_to_desc.contains(stringToStatisticsType(stat_type)); +} + +void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & other, const String & merging_column_name, DataTypePtr merging_column_type, bool if_not_exists) +{ + chassert(merging_column_type); + + if (column_name.empty()) + { + column_name = merging_column_name; + data_type = merging_column_type; } - if (stats.empty()) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Empty statistic column list"); + for (const auto & [stats_type, stats_desc]: other.types_to_desc) + { + if (!if_not_exists && types_to_desc.contains(stats_type)) + { + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, column_name); + } + else if (!types_to_desc.contains(stats_type)) + types_to_desc.emplace(stats_type, stats_desc); + } +} +void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other) +{ + if (other.column_name != column_name) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name); + + types_to_desc = other.types_to_desc; +} + +void ColumnStatisticsDescription::clear() +{ + types_to_desc.clear(); +} + +std::vector ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns) +{ + const auto * stat_definition_ast = definition_ast->as(); + if (!stat_definition_ast) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cast AST to ASTSingleStatisticsDeclaration"); + + StatisticsTypeDescMap statistics_types; + for (const auto & stat_ast : stat_definition_ast->types->children) + { + String stat_type_name = stat_ast->as().name; + auto stat_type = stringToStatisticsType(Poco::toLower(stat_type_name)); + if (statistics_types.contains(stat_type)) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Statistics type {} was specified more than once", stat_type_name); + SingleStatisticsDescription stat(stat_type, stat_ast->clone()); + + statistics_types.emplace(stat.type, stat); + } + + std::vector result; + result.reserve(stat_definition_ast->columns->children.size()); + + for (const auto & column_ast : stat_definition_ast->columns->children) + { + ColumnStatisticsDescription stats; + String physical_column_name = column_ast->as().name(); + + if (!columns.hasPhysical(physical_column_name)) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", physical_column_name); + + const auto & column = columns.getPhysical(physical_column_name); + stats.column_name = column.name; + stats.types_to_desc = statistics_types; + result.push_back(stats); + } + + if (result.empty()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Empty statistics column list is not allowed."); + + return result; +} + +ColumnStatisticsDescription ColumnStatisticsDescription::fromColumnDeclaration(const ASTColumnDeclaration & column, DataTypePtr data_type) +{ + const auto & stat_type_list_ast = column.statistics_desc->as().arguments; + if (stat_type_list_ast->children.empty()) + throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect at least one statistics type for column {}", queryToString(column)); + ColumnStatisticsDescription stats; + stats.column_name = column.name; + for (const auto & ast : stat_type_list_ast->children) + { + const auto & stat_type = ast->as().name; + + SingleStatisticsDescription stat(stringToStatisticsType(Poco::toLower(stat_type)), ast->clone()); + if (stats.types_to_desc.contains(stat.type)) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", stats.column_name, stat_type); + stats.types_to_desc.emplace(stat.type, std::move(stat)); + } + stats.data_type = data_type; return stats; } -String queryToString(const IAST & query); - -StatisticDescription StatisticDescription::getStatisticFromColumnDeclaration(const ASTColumnDeclaration & column) +ASTPtr ColumnStatisticsDescription::getAST() const { - const auto & stat_type_list_ast = column.stat_type->as().arguments; - if (stat_type_list_ast->children.size() != 1) - throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect only one statistic type for column {}", queryToString(column)); - - const auto & stat_type = stat_type_list_ast->children[0]->as().name; - - StatisticDescription stat; - stat.type = stringToType(Poco::toLower(stat_type)); - stat.column_name = column.name; - stat.ast = column.stat_type; - - return stat; + auto function_node = std::make_shared(); + function_node->name = "STATISTICS"; + function_node->arguments = std::make_shared(); + for (const auto & [type, desc] : types_to_desc) + { + if (desc.ast == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown ast"); + function_node->arguments->children.push_back(desc.ast); + } + function_node->children.push_back(function_node->arguments); + return function_node; } } diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h index b571fa31e9d..4862fb79d45 100644 --- a/src/Storages/StatisticsDescription.h +++ b/src/Storages/StatisticsDescription.h @@ -1,45 +1,66 @@ #pragma once +#include #include #include + #include namespace DB { -enum StatisticType +enum class StatisticsType : UInt8 { TDigest = 0, + Uniq = 1, + + Max = 63, }; -class ColumnsDescription; - -struct StatisticDescription +struct SingleStatisticsDescription { - /// the type of statistic, right now it's only tdigest. - StatisticType type; - - /// Names of statistic columns - String column_name; + StatisticsType type; ASTPtr ast; String getTypeName() const; - StatisticDescription() = default; - StatisticDescription(const StatisticDescription & other) { *this = other; } - StatisticDescription & operator=(const StatisticDescription & other); - StatisticDescription(StatisticDescription && other) noexcept { *this = std::move(other); } - StatisticDescription & operator=(StatisticDescription && other) noexcept; + SingleStatisticsDescription() = delete; + SingleStatisticsDescription(StatisticsType type_, ASTPtr ast_); - bool operator==(const StatisticDescription & other) const - { - return type == other.type && column_name == other.column_name; - } + SingleStatisticsDescription(const SingleStatisticsDescription & other) { *this = other; } + SingleStatisticsDescription & operator=(const SingleStatisticsDescription & other); + SingleStatisticsDescription(SingleStatisticsDescription && other) noexcept { *this = std::move(other); } + SingleStatisticsDescription & operator=(SingleStatisticsDescription && other) noexcept; - static StatisticDescription getStatisticFromColumnDeclaration(const ASTColumnDeclaration & column); + bool operator==(const SingleStatisticsDescription & other) const; +}; - static std::vector getStatisticsFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns); +class ColumnsDescription; + +struct ColumnStatisticsDescription +{ + bool operator==(const ColumnStatisticsDescription & other) const; + + bool empty() const; + + bool contains(const String & stat_type) const; + + void merge(const ColumnStatisticsDescription & other, const String & column_name, DataTypePtr column_type, bool if_not_exists); + + void assign(const ColumnStatisticsDescription & other); + + void clear(); + + ASTPtr getAST() const; + + static std::vector fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns); + static ColumnStatisticsDescription fromColumnDeclaration(const ASTColumnDeclaration & column, DataTypePtr data_type); + + using StatisticsTypeDescMap = std::map; + StatisticsTypeDescMap types_to_desc; + String column_name; + DataTypePtr data_type; }; } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 9c58468c4a4..849fa5dbe0b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -846,7 +846,7 @@ void StorageDistributed::read( remote_storage_id = StorageID{remote_database, remote_table}; auto query_tree_distributed = buildQueryTreeDistributed(modified_query_info, - storage_snapshot, + query_info.merge_storage_snapshot ? query_info.merge_storage_snapshot : storage_snapshot, remote_storage_id, remote_table_function_ptr); header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_tree_distributed, local_context, SelectQueryOptions(processed_stage).analyze()); @@ -904,11 +904,13 @@ void StorageDistributed::read( [my_custom_key_ast = std::move(custom_key_ast), column_description = this->getInMemoryMetadataPtr()->columns, custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, + custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, + custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, context = local_context, replica_count = modified_query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr { return getCustomKeyFilterForParallelReplica( - replica_count, replica_num - 1, my_custom_key_ast, custom_key_type, column_description, context); + replica_count, replica_num - 1, my_custom_key_ast, {custom_key_type, custom_key_range_lower, custom_key_range_upper}, column_description, context); }; } } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 6744159d5dc..7f39ff615f0 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1341,6 +1341,7 @@ Chunk StorageFileSource::generate() chassert(file_enumerator); current_path = fmt::format("{}::{}", archive_reader->getPath(), *filename_override); current_file_size = file_enumerator->getFileInfo().uncompressed_size; + current_file_last_modified = file_enumerator->getFileInfo().last_modified; if (need_only_count && tryGetCountFromCache(current_archive_stat)) continue; @@ -1370,6 +1371,7 @@ Chunk StorageFileSource::generate() struct stat file_stat; file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName()); current_file_size = file_stat.st_size; + current_file_last_modified = Poco::Timestamp::fromEpochTime(file_stat.st_mtime); if (getContext()->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) continue; @@ -1436,8 +1438,15 @@ Chunk StorageFileSource::generate() progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); /// Enrich with virtual columns. - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, requested_virtual_columns, current_path, current_file_size, filename_override.has_value() ? &filename_override.value() : nullptr); + VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( + chunk, requested_virtual_columns, + { + .path = current_path, + .size = current_file_size, + .filename = (filename_override.has_value() ? &filename_override.value() : nullptr), + .last_modified = current_file_last_modified + }); + return chunk; } @@ -1780,7 +1789,8 @@ public: void onCancel() override { std::lock_guard cancel_lock(cancel_mutex); - finalize(); + cancelBuffers(); + releaseBuffers(); cancelled = true; } @@ -1794,18 +1804,18 @@ public: catch (...) { /// An exception context is needed to proper delete write buffers without finalization - release(); + releaseBuffers(); } } void onFinish() override { std::lock_guard cancel_lock(cancel_mutex); - finalize(); + finalizeBuffers(); } private: - void finalize() + void finalizeBuffers() { if (!writer) return; @@ -1814,20 +1824,29 @@ private: { writer->finalize(); writer->flush(); - write_buf->finalize(); } catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - release(); + releaseBuffers(); throw; } + + write_buf->finalize(); } - void release() + void releaseBuffers() { writer.reset(); - write_buf->finalize(); + write_buf.reset(); + } + + void cancelBuffers() + { + if (writer) + writer->cancel(); + if (write_buf) + write_buf->cancel(); } StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 37da59c3664..ac094aeb489 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -279,6 +279,7 @@ private: FilesIteratorPtr files_iterator; String current_path; std::optional current_file_size; + std::optional current_file_last_modified; struct stat current_archive_stat; std::optional filename_override; Block sample_block; diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 2190e012c5b..754bc096958 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -50,6 +50,12 @@ namespace ErrorCodes namespace { +struct GenerateRandomState +{ + std::atomic add_total_rows = 0; +}; +using GenerateRandomStatePtr = std::shared_ptr; + void fillBufferWithRandomData(char * __restrict data, size_t limit, size_t size_of_type, pcg64 & rng, [[maybe_unused]] bool flip_bytes = false) { size_t size = limit * size_of_type; @@ -532,10 +538,24 @@ ColumnPtr fillColumnWithRandomData( class GenerateSource : public ISource { public: - GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_, ContextPtr context_) + GenerateSource( + UInt64 block_size_, + UInt64 max_array_length_, + UInt64 max_string_length_, + UInt64 random_seed_, + Block block_header_, + ContextPtr context_, + GenerateRandomStatePtr state_) : ISource(Nested::flattenNested(prepareBlockToFill(block_header_))) - , block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_) - , block_to_fill(std::move(block_header_)), rng(random_seed_), context(context_) {} + , block_size(block_size_) + , max_array_length(max_array_length_) + , max_string_length(max_string_length_) + , block_to_fill(std::move(block_header_)) + , rng(random_seed_) + , context(context_) + , shared_state(state_) + { + } String getName() const override { return "GenerateRandom"; } @@ -549,7 +569,15 @@ protected: columns.emplace_back(fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context)); columns = Nested::flattenNested(block_to_fill.cloneWithColumns(columns)).getColumns(); - return {std::move(columns), block_size}; + + UInt64 total_rows = shared_state->add_total_rows.fetch_and(0); + if (total_rows) + addTotalRowsApprox(total_rows); + + auto chunk = Chunk{std::move(columns), block_size}; + progress(chunk.getNumRows(), chunk.bytes()); + + return chunk; } private: @@ -561,6 +589,7 @@ private: pcg64 rng; ContextPtr context; + GenerateRandomStatePtr shared_state; static Block & prepareBlockToFill(Block & block) { @@ -648,9 +677,6 @@ Pipe StorageGenerateRandom::read( { storage_snapshot->check(column_names); - Pipes pipes; - pipes.reserve(num_streams); - const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); Block block_header; for (const auto & name : column_names) @@ -679,16 +705,24 @@ Pipe StorageGenerateRandom::read( } } + UInt64 query_limit = query_info.trivial_limit; + if (query_limit && num_streams * max_block_size > query_limit) + { + /// We want to avoid spawning more streams than necessary + num_streams = std::min(num_streams, static_cast(((query_limit + max_block_size - 1) / max_block_size))); + } + Pipes pipes; + pipes.reserve(num_streams); + /// Will create more seed values for each source from initial seed. pcg64 generate(random_seed); + auto shared_state = std::make_shared(query_info.trivial_limit); + for (UInt64 i = 0; i < num_streams; ++i) { - auto source = std::make_shared(max_block_size, max_array_length, max_string_length, generate(), block_header, context); - - if (i == 0 && query_info.limit) - source->addTotalRowsApprox(query_info.limit); - + auto source = std::make_shared( + max_block_size, max_array_length, max_string_length, generate(), block_header, context, shared_state); pipes.emplace_back(std::move(source)); } diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index d12e5b1a20b..47e41cccc96 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include @@ -395,11 +395,14 @@ void registerStorageJoin(StorageFactory & factory) else if (kind_str == "full") { if (strictness == JoinStrictness::Any) - strictness = JoinStrictness::RightAny; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ANY FULL JOINs are not implemented"); kind = JoinKind::Full; } } + if ((strictness == JoinStrictness::Semi || strictness == JoinStrictness::Anti) && (kind != JoinKind::Left && kind != JoinKind::Right)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, " SEMI|ANTI JOIN should be LEFT or RIGHT"); + if (kind == JoinKind::Comma) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes)."); diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 08e0526550d..de0324d7998 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -322,6 +322,10 @@ public: /// Rollback partial writes. /// No more writing. + for (auto & [_, stream] : streams) + { + stream.cancel(); + } streams.clear(); /// Truncate files to the older sizes. @@ -373,6 +377,12 @@ private: plain->next(); plain->finalize(); } + + void cancel() + { + compressed.cancel(); + plain->cancel(); + } }; using FileStreams = std::map; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 735f51e1f32..316f398b476 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -273,7 +273,7 @@ void StorageMaterializedView::read( * They may be added in case of distributed query with JOIN. * In that case underlying table returns joined columns as well. */ - converting_actions->projectInput(false); + converting_actions->removeUnusedActions(); auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), converting_actions); converting_step->setStepDescription("Convert target table structure to MaterializedView structure"); query_plan.addStep(std::move(converting_step)); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 4c678a1228b..2dbe82c92d8 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -34,9 +34,10 @@ #include #include #include -#include #include #include +#include +#include #include #include #include @@ -402,10 +403,14 @@ ReadFromMerge::ReadFromMerge( { } -void ReadFromMerge::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) +void ReadFromMerge::addFilter(FilterDAGInfo filter) { - SourceStepWithFilter::updatePrewhereInfo(prewhere_info_value); - common_header = applyPrewhereActions(common_header, prewhere_info); + output_stream->header = FilterTransform::transformHeader( + output_stream->header, + filter.actions.get(), + filter.column_name, + filter.do_remove_column); + pushed_down_filters.push_back(std::move(filter)); } void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) @@ -435,21 +440,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu Names column_names_as_aliases; Aliases aliases; - Names real_column_names = column_names; - if (child_plan.row_policy_data_opt) - child_plan.row_policy_data_opt->extendNames(real_column_names); - - auto modified_query_info = getModifiedQueryInfo(modified_context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); - - auto source_pipeline = createSources( - child_plan.plan, - nested_storage_snaphsot, - modified_query_info, - common_processed_stage, - common_header, - child_plan.table_aliases, - child_plan.row_policy_data_opt, - table); + auto source_pipeline = buildPipeline(child_plan, common_processed_stage); if (source_pipeline && source_pipeline->initialized()) { @@ -567,10 +558,8 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ if (sampling_requested && !storage->supportsSampling()) throw Exception(ErrorCodes::SAMPLING_NOT_SUPPORTED, "Illegal SAMPLE: table {} doesn't support sampling", storage->getStorageID().getNameForLogs()); - res.emplace_back(); - - auto & aliases = res.back().table_aliases; - auto & row_policy_data_opt = res.back().row_policy_data_opt; + Aliases aliases; + RowPolicyDataOpt row_policy_data_opt; auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr(); auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, modified_context); @@ -649,7 +638,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ } - res.back().plan = createPlanForTable( + auto child = createPlanForTable( nested_storage_snaphsot, modified_query_info, common_processed_stage, @@ -659,9 +648,32 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ row_policy_data_opt, modified_context, current_streams); - res.back().plan.addInterpreterContext(modified_context); - } + child.plan.addInterpreterContext(modified_context); + if (child.plan.isInitialized()) + { + addVirtualColumns(child, modified_query_info, common_processed_stage, table); + + /// Subordinary tables could have different but convertible types, like numeric types of different width. + /// We must return streams with structure equals to structure of Merge table. + convertAndFilterSourceStream(common_header, modified_query_info, nested_storage_snaphsot, aliases, row_policy_data_opt, context, child); + + for (const auto & filter_info : pushed_down_filters) + { + auto filter_step = std::make_unique( + child.plan.getCurrentDataStream(), + filter_info.actions->clone(), + filter_info.column_name, + filter_info.do_remove_column); + + child.plan.addStep(std::move(filter_step)); + } + + child.plan.optimize(QueryPlanOptimizationSettings::fromContext(modified_context)); + } + + res.emplace_back(std::move(child)); + } return res; } @@ -876,8 +888,8 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo const StorageID current_storage_id = storage->getStorageID(); SelectQueryInfo modified_query_info = query_info; - if (modified_query_info.optimized_prewhere_info && !modified_query_info.prewhere_info) - modified_query_info.prewhere_info = modified_query_info.optimized_prewhere_info; + + modified_query_info.merge_storage_snapshot = merge_storage_snapshot; if (modified_query_info.planner_context) modified_query_info.planner_context = std::make_shared(modified_context, modified_query_info.planner_context); @@ -964,7 +976,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo } PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); - actions_visitor.visit(filter_actions_dag, column_node); + actions_visitor.visit(*filter_actions_dag, column_node); } column_names_as_aliases = filter_actions_dag->getRequiredColumnsNames(); if (column_names_as_aliases.empty()) @@ -1019,31 +1031,101 @@ bool recursivelyApplyToReadingSteps(QueryPlan::Node * node, const std::function< return ok; } -QueryPipelineBuilderPtr ReadFromMerge::createSources( - QueryPlan & plan, - const StorageSnapshotPtr & storage_snapshot_, +void ReadFromMerge::addVirtualColumns( + ChildPlan & child, SelectQueryInfo & modified_query_info, QueryProcessingStage::Enum processed_stage, - const Block & header, - const Aliases & aliases, - const RowPolicyDataOpt & row_policy_data_opt, - const StorageWithLockAndName & storage_with_lock, - bool concat_streams) const + const StorageWithLockAndName & storage_with_lock) const { - if (!plan.isInitialized()) - return std::make_unique(); - - QueryPipelineBuilderPtr builder; - - const auto & [database_name, storage, _, table_name] = storage_with_lock; + const auto & [database_name, _, storage, table_name] = storage_with_lock; bool allow_experimental_analyzer = context->getSettingsRef().allow_experimental_analyzer; - auto storage_stage - = storage->getQueryProcessingStage(context, processed_stage, storage_snapshot_, modified_query_info); - builder = plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); + /// Add virtual columns if we don't already have them. - if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)) + Block plan_header = child.plan.getCurrentDataStream().header; + + if (allow_experimental_analyzer) + { + String table_alias = modified_query_info.query_tree->as()->getJoinTree()->as()->getAlias(); + + String database_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_database" : table_alias + "._database"; + String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table"; + + if (has_database_virtual_column && common_header.has(database_column) + && child.stage == QueryProcessingStage::FetchColumns && !plan_header.has(database_column)) + { + ColumnWithTypeAndName column; + column.name = database_column; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(database_name)); + + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + child.plan.addStep(std::move(expression_step)); + plan_header = child.plan.getCurrentDataStream().header; + } + + if (has_table_virtual_column && common_header.has(table_column) + && child.stage == QueryProcessingStage::FetchColumns && !plan_header.has(table_column)) + { + ColumnWithTypeAndName column; + column.name = table_column; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(table_name)); + + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + child.plan.addStep(std::move(expression_step)); + plan_header = child.plan.getCurrentDataStream().header; + } + } + else + { + if (has_database_virtual_column && common_header.has("_database") && !plan_header.has("_database")) + { + ColumnWithTypeAndName column; + column.name = "_database"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(database_name)); + + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + child.plan.addStep(std::move(expression_step)); + plan_header = child.plan.getCurrentDataStream().header; + } + + if (has_table_virtual_column && common_header.has("_table") && !plan_header.has("_table")) + { + ColumnWithTypeAndName column; + column.name = "_table"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(table_name)); + + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), adding_column_dag); + child.plan.addStep(std::move(expression_step)); + plan_header = child.plan.getCurrentDataStream().header; + } + } +} + +QueryPipelineBuilderPtr ReadFromMerge::buildPipeline( + ChildPlan & child, + QueryProcessingStage::Enum processed_stage) const +{ + if (!child.plan.isInitialized()) + return nullptr; + + auto optimisation_settings = QueryPlanOptimizationSettings::fromContext(context); + /// All optimisations will be done at plans creation + optimisation_settings.optimize_plan = false; + auto builder = child.plan.buildQueryPipeline(optimisation_settings, BuildQueryPipelineSettings::fromContext(context)); + + if (!builder->initialized()) + return builder; + + bool allow_experimental_analyzer = context->getSettingsRef().allow_experimental_analyzer; + if (processed_stage > child.stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)) { /** Materialization is needed, since from distributed storage the constants come materialized. * If you do not do this, different types (Const and non-Const) columns will be produced in different threads, @@ -1052,99 +1134,10 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( builder->addSimpleTransform([](const Block & stream_header) { return std::make_shared(stream_header); }); } - if (builder->initialized()) - { - if (concat_streams && builder->getNumStreams() > 1) - { - // It's possible to have many tables read from merge, resize(1) might open too many files at the same time. - // Using concat instead. - builder->addTransform(std::make_shared(builder->getHeader(), builder->getNumStreams())); - } - - /// Add virtual columns if we don't already have them. - - Block pipe_header = builder->getHeader(); - - if (allow_experimental_analyzer) - { - String table_alias = modified_query_info.query_tree->as()->getJoinTree()->as()->getAlias(); - - String database_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_database" : table_alias + "._database"; - String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table"; - - if (has_database_virtual_column && common_header.has(database_column) - && storage_stage == QueryProcessingStage::FetchColumns && !pipe_header.has(database_column)) - { - ColumnWithTypeAndName column; - column.name = database_column; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(database_name)); - - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); - - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } - - if (has_table_virtual_column && common_header.has(table_column) - && storage_stage == QueryProcessingStage::FetchColumns && !pipe_header.has(table_column)) - { - ColumnWithTypeAndName column; - column.name = table_column; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(table_name)); - - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); - - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } - } - else - { - if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database")) - { - ColumnWithTypeAndName column; - column.name = "_database"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(database_name)); - - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } - - if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table")) - { - ColumnWithTypeAndName column; - column.name = "_table"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(table_name)); - - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } - } - - /// Subordinary tables could have different but convertible types, like numeric types of different width. - /// We must return streams with structure equals to structure of Merge table. - convertAndFilterSourceStream( - header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, storage_stage); - } - return builder; } -QueryPlan ReadFromMerge::createPlanForTable( +ReadFromMerge::ChildPlan ReadFromMerge::createPlanForTable( const StorageSnapshotPtr & storage_snapshot_, SelectQueryInfo & modified_query_info, QueryProcessingStage::Enum processed_stage, @@ -1181,35 +1174,14 @@ QueryPlan ReadFromMerge::createPlanForTable( if (real_column_names.empty()) real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot_->metadata->getColumns().getAllPhysical()).name); - StorageView * view = dynamic_cast(storage.get()); - if (!view || allow_experimental_analyzer) - { - storage->read(plan, - real_column_names, - storage_snapshot_, - modified_query_info, - modified_context, - processed_stage, - max_block_size, - UInt32(streams_num)); - } - else - { - /// For view storage, we need to rewrite the `modified_query_info.view_query` to optimize read. - /// The most intuitive way is to use InterpreterSelectQuery. - - /// Intercept the settings - modified_context->setSetting("max_threads", streams_num); - modified_context->setSetting("max_streams_to_max_threads_ratio", 1); - modified_context->setSetting("max_block_size", max_block_size); - - InterpreterSelectQuery interpreter(modified_query_info.query, - modified_context, - storage, - view->getInMemoryMetadataPtr(), - SelectQueryOptions(processed_stage)); - interpreter.buildQueryPlan(plan); - } + storage->read(plan, + real_column_names, + storage_snapshot_, + modified_query_info, + modified_context, + processed_stage, + max_block_size, + UInt32(streams_num)); if (!plan.isInitialized()) return {}; @@ -1228,7 +1200,10 @@ QueryPlan ReadFromMerge::createPlanForTable( if (allow_experimental_analyzer) { - InterpreterSelectQueryAnalyzer interpreter(modified_query_info.query_tree, + /// Converting query to AST because types might be different in the source table. + /// Need to resolve types again. + auto ast = modified_query_info.query_tree->toAST(); + InterpreterSelectQueryAnalyzer interpreter(ast, modified_context, SelectQueryOptions(processed_stage)); @@ -1248,7 +1223,7 @@ QueryPlan ReadFromMerge::createPlanForTable( } } - return plan; + return ChildPlan{std::move(plan), storage_stage}; } ReadFromMerge::RowPolicyData::RowPolicyData(RowPolicyFilterPtr row_policy_filter_ptr, @@ -1306,12 +1281,10 @@ void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step) step->addFilter(actions_dag, filter_column_name); } -void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPipelineBuilder & builder) const +void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPlan & plan) const { - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header, filter_actions, filter_column_name, true /* remove filter column */); - }); + auto filter_step = std::make_unique(plan.getCurrentDataStream(), actions_dag, filter_column_name, true /* remove filter column */); + plan.addStep(std::move(filter_step)); } StorageMerge::StorageListWithLocks ReadFromMerge::getSelectedTables( @@ -1490,13 +1463,12 @@ void ReadFromMerge::convertAndFilterSourceStream( const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, ContextPtr local_context, - QueryPipelineBuilder & builder, - QueryProcessingStage::Enum processed_stage) + ChildPlan & child) { - Block before_block_header = builder.getHeader(); + Block before_block_header = child.plan.getCurrentDataStream().header; auto storage_sample_block = snapshot->metadata->getSampleBlock(); - auto pipe_columns = builder.getHeader().getNamesAndTypesList(); + auto pipe_columns = before_block_header.getNamesAndTypesList(); if (local_context->getSettingsRef().allow_experimental_analyzer) { @@ -1513,19 +1485,14 @@ void ReadFromMerge::convertAndFilterSourceStream( query_analysis_pass.run(query_tree, local_context); PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); - const auto & nodes = actions_visitor.visit(actions_dag, query_tree); + const auto & nodes = actions_visitor.visit(*actions_dag, query_tree); if (nodes.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size()); actions_dag->addOrReplaceInOutputs(actions_dag->addAlias(*nodes.front(), alias.name)); - - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header, actions); - }); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), actions_dag); + child.plan.addStep(std::move(expression_step)); } } else @@ -1539,37 +1506,26 @@ void ReadFromMerge::convertAndFilterSourceStream( auto dag = std::make_shared(pipe_columns); auto actions_dag = expression_analyzer.getActionsDAG(true, false); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header, actions); - }); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), actions_dag); + child.plan.addStep(std::move(expression_step)); } } ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name; if (local_context->getSettingsRef().allow_experimental_analyzer - && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) + && (child.stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; if (row_policy_data_opt) - { - row_policy_data_opt->addFilterTransform(builder); - } + row_policy_data_opt->addFilterTransform(child.plan); - auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(), + auto convert_actions_dag = ActionsDAG::makeConvertingActions(child.plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), header.getColumnsWithTypeAndName(), convert_actions_match_columns_mode); - auto actions = std::make_shared( - std::move(convert_actions_dag), - ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header, actions); - }); + auto expression_step = std::make_unique(child.plan.getCurrentDataStream(), convert_actions_dag); + child.plan.addStep(std::move(expression_step)); } const ReadFromMerge::StorageListWithLocks & ReadFromMerge::getSelectedTables() @@ -1606,29 +1562,14 @@ bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_) return true; } -void ReadFromMerge::applyFilters(const QueryPlan & plan, const ActionDAGNodes & added_filter_nodes) const -{ - auto apply_filters = [&added_filter_nodes](ReadFromMergeTree & read_from_merge_tree) - { - for (const auto & node : added_filter_nodes.nodes) - read_from_merge_tree.addFilterFromParentStep(node); - - read_from_merge_tree.SourceStepWithFilter::applyFilters(); - return true; - }; - - recursivelyApplyToReadingSteps(plan.getRootNode(), apply_filters); -} - void ReadFromMerge::applyFilters(ActionDAGNodes added_filter_nodes) { + for (const auto & filter_info : pushed_down_filters) + added_filter_nodes.nodes.push_back(&filter_info.actions->findInOutputs(filter_info.column_name)); + SourceStepWithFilter::applyFilters(added_filter_nodes); filterTablesAndCreateChildrenPlans(); - - for (const auto & child_plan : *child_plans) - if (child_plan.plan.isInitialized()) - applyFilters(child_plan.plan, added_filter_nodes); } QueryPlanRawPtrs ReadFromMerge::getChildPlans() diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 735c8711a63..94b34256d02 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -165,7 +165,7 @@ public: QueryPlanRawPtrs getChildPlans() override; - void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) override; + void addFilter(FilterDAGInfo filter); private: const size_t required_max_block_size; @@ -221,7 +221,7 @@ private: /// Create explicit filter transform to exclude /// rows that are not conform to row level policy - void addFilterTransform(QueryPipelineBuilder &) const; + void addFilterTransform(QueryPlan &) const; private: std::string filter_column_name; // complex filter, may contain logic operations @@ -235,21 +235,21 @@ private: struct ChildPlan { QueryPlan plan; - Aliases table_aliases; - RowPolicyDataOpt row_policy_data_opt; + QueryProcessingStage::Enum stage; }; /// Store read plan for each child table. /// It's needed to guarantee lifetime for child steps to be the same as for this step (mainly for EXPLAIN PIPELINE). std::optional> child_plans; + /// Store filters pushed down from query plan optimization. Filters are added on top of child plans. + std::vector pushed_down_filters; + std::vector createChildrenPlans(SelectQueryInfo & query_info_) const; void filterTablesAndCreateChildrenPlans(); - void applyFilters(const QueryPlan & plan, const ActionDAGNodes & added_filter_nodes) const; - - QueryPlan createPlanForTable( + ChildPlan createPlanForTable( const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, QueryProcessingStage::Enum processed_stage, @@ -260,16 +260,15 @@ private: ContextMutablePtr modified_context, size_t streams_num) const; - QueryPipelineBuilderPtr createSources( - QueryPlan & plan, - const StorageSnapshotPtr & storage_snapshot, + void addVirtualColumns( + ChildPlan & child, SelectQueryInfo & modified_query_info, QueryProcessingStage::Enum processed_stage, - const Block & header, - const Aliases & aliases, - const RowPolicyDataOpt & row_policy_data_opt, - const StorageWithLockAndName & storage_with_lock, - bool concat_streams = false) const; + const StorageWithLockAndName & storage_with_lock) const; + + QueryPipelineBuilderPtr buildPipeline( + ChildPlan & child, + QueryProcessingStage::Enum processed_stage) const; static void convertAndFilterSourceStream( const Block & header, @@ -278,15 +277,12 @@ private: const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, ContextPtr context, - QueryPipelineBuilder & builder, - QueryProcessingStage::Enum processed_stage); + ChildPlan & child); StorageMerge::StorageListWithLocks getSelectedTables( ContextPtr query_context, bool filter_by_database_virtual_column, bool filter_by_table_virtual_column) const; - - // static VirtualColumnsDescription createVirtuals(StoragePtr first_table); }; } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 27a76f4f21d..94d7a33d0dd 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -213,36 +213,13 @@ void StorageMergeTree::read( size_t max_block_size, size_t num_streams) { - if (local_context->canUseParallelReplicasOnInitiator() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) + const auto & settings = local_context->getSettingsRef(); + /// reading step for parallel replicas with new analyzer is built in Planner, so don't do it here + if (local_context->canUseParallelReplicasOnInitiator() && settings.parallel_replicas_for_non_replicated_merge_tree + && !settings.allow_experimental_analyzer) { - ASTPtr modified_query_ast; - Block header; - if (local_context->getSettingsRef().allow_experimental_analyzer) - { - QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone(); - rewriteJoinToGlobalJoin(modified_query_tree, local_context); - modified_query_tree = buildQueryTreeForShard(query_info.planner_context, modified_query_tree); - header = InterpreterSelectQueryAnalyzer::getSampleBlock( - modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); - modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree); - } - else - { - const auto table_id = getStorageID(); - modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, - table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - header - = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); - } - ClusterProxy::executeQueryWithParallelReplicas( - query_plan, - getStorageID(), - header, - processed_stage, - modified_query_ast, - local_context, - query_info.storage_limits); + query_plan, getStorageID(), processed_stage, query_info.query, local_context, query_info.storage_limits); } else { @@ -1292,6 +1269,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( if (command.type != MutationCommand::Type::DROP_COLUMN && command.type != MutationCommand::Type::DROP_INDEX && command.type != MutationCommand::Type::DROP_PROJECTION + && command.type != MutationCommand::Type::DROP_STATISTICS && command.type != MutationCommand::Type::RENAME_COLUMN) { commands_for_size_validation.push_back(command); @@ -1479,6 +1457,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign cleared_count += clearOldPartsFromFilesystem(); cleared_count += clearOldMutations(); cleared_count += clearEmptyParts(); + cleared_count += unloadPrimaryKeysOfOutdatedParts(); return cleared_count; /// TODO maybe take into account number of cleared objects when calculating backoff }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 9379cb5a1c6..a8713c61e4d 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -35,9 +35,12 @@ #include #include +#include +#include #include #include +#include #include #include @@ -106,28 +109,79 @@ ColumnsDescription StoragePostgreSQL::getTableStructureFromData( return ColumnsDescription{columns_info->columns}; } -Pipe StoragePostgreSQL::read( - const Names & column_names_, +namespace +{ + +class ReadFromPostgreSQL : public SourceStepWithFilter +{ +public: + ReadFromPostgreSQL( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + size_t max_block_size_, + String remote_table_schema_, + String remote_table_name_, + postgres::ConnectionHolderPtr connection_) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) + , logger(getLogger("ReadFromPostgreSQL")) + , max_block_size(max_block_size_) + , remote_table_schema(remote_table_schema_) + , remote_table_name(remote_table_name_) + , connection(std::move(connection_)) + { + } + + std::string getName() const override { return "ReadFromPostgreSQL"; } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override + { + std::optional transform_query_limit; + if (limit && !filter_actions_dag) + transform_query_limit = limit; + + /// Connection is already made to the needed database, so it should not be present in the query; + /// remote_table_schema is empty if it is not specified, will access only table_name. + String query = transformQueryForExternalDatabase( + query_info, + required_source_columns, + storage_snapshot->metadata->getColumns().getOrdinary(), + IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::PostgreSQL, + remote_table_schema, + remote_table_name, + context, + transform_query_limit); + LOG_TRACE(logger, "Query: {}", query); + + pipeline.init(Pipe(std::make_shared>(std::move(connection), query, getOutputStream().header, max_block_size))); + } + + LoggerPtr logger; + size_t max_block_size; + String remote_table_schema; + String remote_table_name; + postgres::ConnectionHolderPtr connection; +}; + +} + +void StoragePostgreSQL::read( + QueryPlan & query_plan, + const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info_, - ContextPtr context_, + SelectQueryInfo & query_info, + ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size_, + size_t max_block_size, size_t /*num_streams*/) { - storage_snapshot->check(column_names_); - - /// Connection is already made to the needed database, so it should not be present in the query; - /// remote_table_schema is empty if it is not specified, will access only table_name. - String query = transformQueryForExternalDatabase( - query_info_, - column_names_, - storage_snapshot->metadata->getColumns().getOrdinary(), - IdentifierQuotingStyle::DoubleQuotes, LiteralEscapingStyle::PostgreSQL, remote_table_schema, remote_table_name, context_); - LOG_TRACE(log, "Query: {}", query); + storage_snapshot->check(column_names); Block sample_block; - for (const String & column_name : column_names_) + for (const String & column_name : column_names) { auto column_data = storage_snapshot->metadata->getColumns().getPhysical(column_name); WhichDataType which(column_data.type); @@ -136,7 +190,17 @@ Pipe StoragePostgreSQL::read( sample_block.insert({ column_data.type, column_data.name }); } - return Pipe(std::make_shared>(pool->get(), query, sample_block, max_block_size_)); + auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, + sample_block, + max_block_size, + remote_table_schema, + remote_table_name, + pool->get()); + query_plan.addStep(std::move(reading)); } diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index 1ed4f7a7611..a8fa22f71b2 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -37,11 +37,12 @@ public: String getName() const override { return "PostgreSQL"; } - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, - ContextPtr context, + ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams) override; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e18e66d7af9..a127384c03c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5461,7 +5461,8 @@ void StorageReplicatedMergeTree::read( /// For this you have to synchronously go to ZooKeeper. if (settings.select_sequential_consistency) readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); - else if (local_context->canUseParallelReplicasOnInitiator()) + /// reading step for parallel replicas with new analyzer is built in Planner, so don't do it here + else if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer) readParallelReplicasImpl(query_plan, column_names, query_info, local_context, processed_stage); else readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); @@ -5493,36 +5494,8 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl( ContextPtr local_context, QueryProcessingStage::Enum processed_stage) { - ASTPtr modified_query_ast; - Block header; - const auto table_id = getStorageID(); - - if (local_context->getSettingsRef().allow_experimental_analyzer) - { - QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone(); - rewriteJoinToGlobalJoin(modified_query_tree, local_context); - modified_query_tree = buildQueryTreeForShard(query_info.planner_context, modified_query_tree); - - header = InterpreterSelectQueryAnalyzer::getSampleBlock( - modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); - modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree); - } - else - { - modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, - table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - header - = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); - } - ClusterProxy::executeQueryWithParallelReplicas( - query_plan, - table_id, - header, - processed_stage, - modified_query_ast, - local_context, - query_info.storage_limits); + query_plan, getStorageID(), processed_stage, query_info.query, local_context, query_info.storage_limits); } void StorageReplicatedMergeTree::readLocalImpl( @@ -5683,7 +5656,7 @@ std::optional StorageReplicatedMergeTree::distributedWriteFromClu { auto connection = std::make_shared( node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), - node.user, node.password, SSHKey(), node.quota_key, node.cluster, node.cluster_secret, + node.user, node.password, SSHKey(), /*jwt*/"", node.quota_key, node.cluster, node.cluster_secret, "ParallelInsertSelectInititiator", node.compression, node.secure diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp deleted file mode 100644 index b767805f637..00000000000 --- a/src/Storages/StorageS3Settings.cpp +++ /dev/null @@ -1,315 +0,0 @@ -#include - -#include - -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int INVALID_SETTING_VALUE; -} - -S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings, bool validate_settings) -{ - updateFromSettings(settings, false); - if (validate_settings) - validate(); -} - -S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings( - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - const Settings & settings, - String setting_name_prefix, - bool validate_settings) - : PartUploadSettings(settings, validate_settings) -{ - String key = config_prefix + "." + setting_name_prefix; - strict_upload_part_size = config.getUInt64(key + "strict_upload_part_size", strict_upload_part_size); - min_upload_part_size = config.getUInt64(key + "min_upload_part_size", min_upload_part_size); - max_upload_part_size = config.getUInt64(key + "max_upload_part_size", max_upload_part_size); - upload_part_size_multiply_factor = config.getUInt64(key + "upload_part_size_multiply_factor", upload_part_size_multiply_factor); - upload_part_size_multiply_parts_count_threshold = config.getUInt64(key + "upload_part_size_multiply_parts_count_threshold", upload_part_size_multiply_parts_count_threshold); - max_inflight_parts_for_one_file = config.getUInt64(key + "max_inflight_parts_for_one_file", max_inflight_parts_for_one_file); - max_part_number = config.getUInt64(key + "max_part_number", max_part_number); - max_single_part_upload_size = config.getUInt64(key + "max_single_part_upload_size", max_single_part_upload_size); - max_single_operation_copy_size = config.getUInt64(key + "max_single_operation_copy_size", max_single_operation_copy_size); - - /// This configuration is only applicable to s3. Other types of object storage are not applicable or have different meanings. - storage_class_name = config.getString(config_prefix + ".s3_storage_class", storage_class_name); - storage_class_name = Poco::toUpperInPlace(storage_class_name); - - if (validate_settings) - validate(); -} - -S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedCollection & collection) -{ - strict_upload_part_size = collection.getOrDefault("strict_upload_part_size", strict_upload_part_size); - min_upload_part_size = collection.getOrDefault("min_upload_part_size", min_upload_part_size); - max_single_part_upload_size = collection.getOrDefault("max_single_part_upload_size", max_single_part_upload_size); - upload_part_size_multiply_factor = collection.getOrDefault("upload_part_size_multiply_factor", upload_part_size_multiply_factor); - upload_part_size_multiply_parts_count_threshold = collection.getOrDefault("upload_part_size_multiply_parts_count_threshold", upload_part_size_multiply_parts_count_threshold); - max_inflight_parts_for_one_file = collection.getOrDefault("max_inflight_parts_for_one_file", max_inflight_parts_for_one_file); - - /// This configuration is only applicable to s3. Other types of object storage are not applicable or have different meanings. - storage_class_name = collection.getOrDefault("s3_storage_class", storage_class_name); - storage_class_name = Poco::toUpperInPlace(storage_class_name); - - validate(); -} - -void S3Settings::RequestSettings::PartUploadSettings::updateFromSettings(const Settings & settings, bool if_changed) -{ - if (!if_changed || settings.s3_strict_upload_part_size.changed) - strict_upload_part_size = settings.s3_strict_upload_part_size; - - if (!if_changed || settings.s3_min_upload_part_size.changed) - min_upload_part_size = settings.s3_min_upload_part_size; - - if (!if_changed || settings.s3_max_upload_part_size.changed) - max_upload_part_size = settings.s3_max_upload_part_size; - - if (!if_changed || settings.s3_upload_part_size_multiply_factor.changed) - upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor; - - if (!if_changed || settings.s3_upload_part_size_multiply_parts_count_threshold.changed) - upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold; - - if (!if_changed || settings.s3_max_inflight_parts_for_one_file.changed) - max_inflight_parts_for_one_file = settings.s3_max_inflight_parts_for_one_file; - - if (!if_changed || settings.s3_max_single_part_upload_size.changed) - max_single_part_upload_size = settings.s3_max_single_part_upload_size; -} - -void S3Settings::RequestSettings::PartUploadSettings::validate() -{ - static constexpr size_t min_upload_part_size_limit = 5 * 1024 * 1024; - if (strict_upload_part_size && strict_upload_part_size < min_upload_part_size_limit) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting strict_upload_part_size has invalid value {} which is less than the s3 API limit {}", - ReadableSize(strict_upload_part_size), ReadableSize(min_upload_part_size_limit)); - - if (min_upload_part_size < min_upload_part_size_limit) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting min_upload_part_size has invalid value {} which is less than the s3 API limit {}", - ReadableSize(min_upload_part_size), ReadableSize(min_upload_part_size_limit)); - - static constexpr size_t max_upload_part_size_limit = 5ull * 1024 * 1024 * 1024; - if (max_upload_part_size > max_upload_part_size_limit) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting max_upload_part_size has invalid value {} which is greater than the s3 API limit {}", - ReadableSize(max_upload_part_size), ReadableSize(max_upload_part_size_limit)); - - if (max_single_part_upload_size > max_upload_part_size_limit) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting max_single_part_upload_size has invalid value {} which is grater than the s3 API limit {}", - ReadableSize(max_single_part_upload_size), ReadableSize(max_upload_part_size_limit)); - - if (max_single_operation_copy_size > max_upload_part_size_limit) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting max_single_operation_copy_size has invalid value {} which is grater than the s3 API limit {}", - ReadableSize(max_single_operation_copy_size), ReadableSize(max_upload_part_size_limit)); - - if (max_upload_part_size < min_upload_part_size) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting max_upload_part_size ({}) can't be less than setting min_upload_part_size {}", - ReadableSize(max_upload_part_size), ReadableSize(min_upload_part_size)); - - if (!upload_part_size_multiply_factor) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting upload_part_size_multiply_factor cannot be zero"); - - if (!upload_part_size_multiply_parts_count_threshold) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting upload_part_size_multiply_parts_count_threshold cannot be zero"); - - if (!max_part_number) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting max_part_number cannot be zero"); - - static constexpr size_t max_part_number_limit = 10000; - if (max_part_number > max_part_number_limit) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting max_part_number has invalid value {} which is grater than the s3 API limit {}", - ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); - - size_t maybe_overflow; - if (common::mulOverflow(max_upload_part_size, upload_part_size_multiply_factor, maybe_overflow)) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting upload_part_size_multiply_factor is too big ({}). " - "Multiplication to max_upload_part_size ({}) will cause integer overflow", - ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); - - std::unordered_set storage_class_names {"STANDARD", "INTELLIGENT_TIERING"}; - if (!storage_class_name.empty() && !storage_class_names.contains(storage_class_name)) - throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting storage_class has invalid value {} which only supports STANDARD and INTELLIGENT_TIERING", - storage_class_name); - - /// TODO: it's possible to set too small limits. We can check that max possible object size is not too small. -} - - -S3Settings::RequestSettings::RequestSettings(const Settings & settings, bool validate_settings) - : upload_settings(settings, validate_settings) -{ - updateFromSettingsImpl(settings, false); -} - -S3Settings::RequestSettings::RequestSettings(const NamedCollection & collection) - : upload_settings(collection) -{ - max_single_read_retries = collection.getOrDefault("max_single_read_retries", max_single_read_retries); - max_connections = collection.getOrDefault("max_connections", max_connections); - list_object_keys_size = collection.getOrDefault("list_object_keys_size", list_object_keys_size); - allow_native_copy = collection.getOrDefault("allow_native_copy", allow_native_copy); - throw_on_zero_files_match = collection.getOrDefault("throw_on_zero_files_match", throw_on_zero_files_match); -} - -S3Settings::RequestSettings::RequestSettings( - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - const Settings & settings, - String setting_name_prefix, - bool validate_settings) - : upload_settings(config, config_prefix, settings, setting_name_prefix, validate_settings) -{ - String key = config_prefix + "." + setting_name_prefix; - max_single_read_retries = config.getUInt64(key + "max_single_read_retries", settings.s3_max_single_read_retries); - max_connections = config.getUInt64(key + "max_connections", settings.s3_max_connections); - check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload); - list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size); - allow_native_copy = config.getBool(key + "allow_native_copy", allow_native_copy); - throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match); - retry_attempts = config.getUInt64(key + "retry_attempts", settings.s3_retry_attempts); - request_timeout_ms = config.getUInt64(key + "request_timeout_ms", settings.s3_request_timeout_ms); - - /// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, - /// which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. - if (UInt64 max_get_rps = config.getUInt64(key + "max_get_rps", settings.s3_max_get_rps)) - { - size_t default_max_get_burst = settings.s3_max_get_burst - ? settings.s3_max_get_burst - : (Throttler::default_burst_seconds * max_get_rps); - - size_t max_get_burst = config.getUInt64(key + "max_get_burst", default_max_get_burst); - - get_request_throttler = std::make_shared(max_get_rps, max_get_burst); - } - if (UInt64 max_put_rps = config.getUInt64(key + "max_put_rps", settings.s3_max_put_rps)) - { - size_t default_max_put_burst = settings.s3_max_put_burst - ? settings.s3_max_put_burst - : (Throttler::default_burst_seconds * max_put_rps); - - size_t max_put_burst = config.getUInt64(key + "max_put_burst", default_max_put_burst); - - put_request_throttler = std::make_shared(max_put_rps, max_put_burst); - } -} - -void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed) -{ - if (!if_changed || settings.s3_max_single_read_retries.changed) - max_single_read_retries = settings.s3_max_single_read_retries; - - if (!if_changed || settings.s3_max_connections.changed) - max_connections = settings.s3_max_connections; - - if (!if_changed || settings.s3_check_objects_after_upload.changed) - check_objects_after_upload = settings.s3_check_objects_after_upload; - - if (!if_changed || settings.s3_max_unexpected_write_error_retries.changed) - max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; - - if (!if_changed || settings.s3_list_object_keys_size.changed) - list_object_keys_size = settings.s3_list_object_keys_size; - - if ((!if_changed || settings.s3_max_get_rps.changed || settings.s3_max_get_burst.changed) && settings.s3_max_get_rps) - get_request_throttler = std::make_shared( - settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); - - if ((!if_changed || settings.s3_max_put_rps.changed || settings.s3_max_put_burst.changed) && settings.s3_max_put_rps) - put_request_throttler = std::make_shared( - settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); - - if (!if_changed || settings.s3_throw_on_zero_files_match.changed) - throw_on_zero_files_match = settings.s3_throw_on_zero_files_match; - - if (!if_changed || settings.s3_retry_attempts.changed) - retry_attempts = settings.s3_retry_attempts; - - if (!if_changed || settings.s3_request_timeout_ms.changed) - request_timeout_ms = settings.s3_request_timeout_ms; -} - -void S3Settings::RequestSettings::updateFromSettingsIfChanged(const Settings & settings) -{ - updateFromSettingsImpl(settings, true); - upload_settings.updateFromSettings(settings, true); -} - -void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings) -{ - std::lock_guard lock(mutex); - s3_settings.clear(); - if (!config.has(config_elem)) - return; - - Poco::Util::AbstractConfiguration::Keys config_keys; - config.keys(config_elem, config_keys); - - for (const String & key : config_keys) - { - if (config.has(config_elem + "." + key + ".endpoint")) - { - auto endpoint = config.getString(config_elem + "." + key + ".endpoint"); - auto auth_settings = S3::AuthSettings::loadFromConfig(config_elem + "." + key, config); - S3Settings::RequestSettings request_settings(config, config_elem + "." + key, settings); - - s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(request_settings)}); - } - } -} - -std::optional StorageS3Settings::getSettings(const String & endpoint, const String & user, bool ignore_user) const -{ - std::lock_guard lock(mutex); - auto next_prefix_setting = s3_settings.upper_bound(endpoint); - - /// Linear time algorithm may be replaced with logarithmic with prefix tree map. - for (auto possible_prefix_setting = next_prefix_setting; possible_prefix_setting != s3_settings.begin();) - { - std::advance(possible_prefix_setting, -1); - const auto & [endpoint_prefix, settings] = *possible_prefix_setting; - if (endpoint.starts_with(endpoint_prefix) && (ignore_user || settings.auth_settings.canBeUsedByUser(user))) - return possible_prefix_setting->second; - } - - return {}; -} - -} diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h deleted file mode 100644 index c3bc8aa6ed6..00000000000 --- a/src/Storages/StorageS3Settings.h +++ /dev/null @@ -1,122 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace Poco::Util -{ -class AbstractConfiguration; -} - -namespace DB -{ - -struct Settings; -class NamedCollection; - -struct S3Settings -{ - struct RequestSettings - { - struct PartUploadSettings - { - size_t strict_upload_part_size = 0; - size_t min_upload_part_size = 16 * 1024 * 1024; - size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; - size_t upload_part_size_multiply_factor = 2; - size_t upload_part_size_multiply_parts_count_threshold = 500; - size_t max_inflight_parts_for_one_file = 20; - size_t max_part_number = 10000; - size_t max_single_part_upload_size = 32 * 1024 * 1024; - size_t max_single_operation_copy_size = 5ULL * 1024 * 1024 * 1024; - String storage_class_name; - - void updateFromSettings(const Settings & settings, bool if_changed); - void validate(); - - private: - PartUploadSettings() = default; - explicit PartUploadSettings(const Settings & settings, bool validate_settings = true); - explicit PartUploadSettings(const NamedCollection & collection); - PartUploadSettings( - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - const Settings & settings, - String setting_name_prefix = {}, - bool validate_settings = true); - - friend struct RequestSettings; - }; - - private: - PartUploadSettings upload_settings = {}; - - public: - size_t max_single_read_retries = 4; - size_t max_connections = 1024; - bool check_objects_after_upload = false; - size_t max_unexpected_write_error_retries = 4; - size_t list_object_keys_size = 1000; - ThrottlerPtr get_request_throttler; - ThrottlerPtr put_request_throttler; - size_t retry_attempts = 10; - size_t request_timeout_ms = 30000; - bool allow_native_copy = true; - - bool throw_on_zero_files_match = false; - - const PartUploadSettings & getUploadSettings() const { return upload_settings; } - PartUploadSettings & getUploadSettings() { return upload_settings; } - - void setStorageClassName(const String & storage_class_name) { upload_settings.storage_class_name = storage_class_name; } - - RequestSettings() = default; - explicit RequestSettings(const Settings & settings, bool validate_settings = true); - explicit RequestSettings(const NamedCollection & collection); - - /// What's the setting_name_prefix, and why do we need it? - /// There are (at least) two config sections where s3 settings can be specified: - /// * settings for s3 disk (clickhouse/storage_configuration/disks) - /// * settings for s3 storage (clickhouse/s3), which are also used for backups - /// Even though settings are the same, in case of s3 disk they are prefixed with "s3_" - /// ("s3_max_single_part_upload_size"), but in case of s3 storage they are not - /// ( "max_single_part_upload_size"). Why this happened is a complete mystery to me. - RequestSettings( - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - const Settings & settings, - String setting_name_prefix = {}, - bool validate_settings = true); - - void updateFromSettingsIfChanged(const Settings & settings); - - private: - void updateFromSettingsImpl(const Settings & settings, bool if_changed); - }; - - S3::AuthSettings auth_settings; - RequestSettings request_settings; -}; - -/// Settings for the StorageS3. -class StorageS3Settings -{ -public: - void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings); - - std::optional getSettings(const String & endpoint, const String & user, bool ignore_user = false) const; - -private: - mutable std::mutex mutex; - std::map s3_settings; -}; - -} diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index a8c8e81e23d..5b7f9fc0ac2 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -97,8 +97,7 @@ void SetOrJoinSink::onFinish() if (persistent) { backup_stream.flush(); - compressed_backup_buf.next(); - backup_buf->next(); + compressed_backup_buf.finalize(); backup_buf->finalize(); table.disk->replaceFile(fs::path(backup_tmp_path) / backup_file_name, fs::path(backup_path) / backup_file_name); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index f0c5103d657..8df87d6290f 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -207,7 +207,10 @@ public: /// Rollback partial writes. /// No more writing. + data_out->cancel(); data_out.reset(); + + data_out_compressed->cancel(); data_out_compressed.reset(); /// Truncate files to the older sizes. @@ -233,8 +236,7 @@ public: if (done) return; - data_out->next(); - data_out_compressed->next(); + data_out->finalize(); data_out_compressed->finalize(); /// Save the new indices. @@ -494,8 +496,7 @@ void StorageStripeLog::saveIndices(const WriteLock & /* already locked for writi for (size_t i = start; i != num_indices; ++i) indices.blocks[i].write(*index_out); - index_out->next(); - index_out_compressed->next(); + index_out->finalize(); index_out_compressed->finalize(); num_indices_saved = num_indices; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 8d1c6933503..895da028fc2 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -411,7 +411,12 @@ Chunk StorageURLSource::generate() if (input_format) chunk_size = input_format->getApproxBytesReadForChunk(); progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, curr_uri.getPath(), current_file_size); + VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( + chunk, requested_virtual_columns, + { + .path = curr_uri.getPath(), + .size = current_file_size + }); return chunk; } @@ -571,31 +576,25 @@ void StorageURLSink::consume(Chunk chunk) void StorageURLSink::onCancel() { std::lock_guard lock(cancel_mutex); - finalize(); + cancelBuffers(); + releaseBuffers(); cancelled = true; } -void StorageURLSink::onException(std::exception_ptr exception) +void StorageURLSink::onException(std::exception_ptr) { std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } + cancelBuffers(); + releaseBuffers(); } void StorageURLSink::onFinish() { std::lock_guard lock(cancel_mutex); - finalize(); + finalizeBuffers(); } -void StorageURLSink::finalize() +void StorageURLSink::finalizeBuffers() { if (!writer) return; @@ -604,20 +603,29 @@ void StorageURLSink::finalize() { writer->finalize(); writer->flush(); - write_buf->finalize(); } catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - release(); + releaseBuffers(); throw; } + + write_buf->finalize(); } -void StorageURLSink::release() +void StorageURLSink::releaseBuffers() { writer.reset(); - write_buf->finalize(); + write_buf.reset(); +} + +void StorageURLSink::cancelBuffers() +{ + if (writer) + writer->cancel(); + if (write_buf) + write_buf->cancel(); } class PartitionedStorageURLSink : public PartitionedSink diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index f550ccb2bc4..3090f8db12e 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -257,8 +257,10 @@ public: void onFinish() override; private: - void finalize(); - void release(); + void finalizeBuffers(); + void releaseBuffers(); + void cancelBuffers(); + std::unique_ptr write_buf; OutputFormatPtr writer; std::mutex cancel_mutex; diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index cb8d5caa50c..160c8d6270e 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -54,6 +54,10 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr co if (auto database_cluster = replicated->tryGetCluster()) writeCluster(res_columns, {name_and_database.first, database_cluster}, replicated->tryGetAreReplicasActive(database_cluster)); + + if (auto database_cluster = replicated->tryGetAllGroupsCluster()) + writeCluster(res_columns, {DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX + name_and_database.first, database_cluster}, + replicated->tryGetAreReplicasActive(database_cluster)); } } } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 49da1eba9ec..8dd8d3b6154 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -298,7 +298,7 @@ private: ClientInfo::Interface client_info_interface; size_t db_table_num = 0; size_t total_tables; - std::shared_ptr access; + std::shared_ptr access; bool need_to_check_access_for_tables; String query_id; std::chrono::milliseconds lock_acquire_timeout; diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index b42b070d518..9201eef185f 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -194,6 +194,7 @@ const char * auto_contributors[] { "Artem Gavrilov", "Artem Hnilov", "Artem Konovalov", + "Artem Mustafin", "Artem Pershin", "Artem Streltsov", "Artem Zuikov", @@ -307,6 +308,7 @@ const char * auto_contributors[] { "Daniil Ivanik", "Daniil Rubin", "Danila Kutenin", + "Danila Puzov", "Daniël van Eeden", "Dao", "Dao Minh Thuc", @@ -417,6 +419,7 @@ const char * auto_contributors[] { "Filippov Denis", "Fille", "Flowyi", + "Francesco Ciocchetti", "Francisco Barón", "Francisco Javier Jurado Moreno", "Frank Chen", @@ -449,6 +452,7 @@ const char * auto_contributors[] { "Gleb-Tretyakov", "GoGoWen2021", "Gregory", + "Grigorii Sokolik", "Grigory", "Grigory Buteyko", "Grigory Pervakov", @@ -464,6 +468,7 @@ const char * auto_contributors[] { "Hamoon", "Han Fei", "Han Shukai", + "HappenLee", "Harry Lee", "Harry-Lee", "HarryLeeIBM", @@ -627,6 +632,7 @@ const char * auto_contributors[] { "Kostiantyn Storozhuk", "Kozlov Ivan", "KrJin", + "Kris Buytaert", "Krisztián Szűcs", "Kruglov Pavel", "Krzysztof Góralski", @@ -644,6 +650,7 @@ const char * auto_contributors[] { "Latysheva Alexandra", "Laurie Li", "LaurieLY", + "Lee sungju", "Lemore", "Leonardo Cecchi", "Leonardo Maciel", @@ -770,6 +777,7 @@ const char * auto_contributors[] { "Mikhail Filimonov", "Mikhail Fursov", "Mikhail Gaidamaka", + "Mikhail Gorshkov", "Mikhail Guzov", "Mikhail Korotov", "Mikhail Koviazin", @@ -904,11 +912,13 @@ const char * auto_contributors[] { "Petr Vasilev", "Pham Anh Tuan", "Philip Hallstrom", + "Philipp Schreiber", "Philippe Ombredanne", "PigInCloud", "Potya", "Pradeep Chhetri", "Prashant Shahi", + "Pratima Patel", "Priyansh Agrawal", "Pxl", "Pysaoke", @@ -978,6 +988,7 @@ const char * auto_contributors[] { "Samuel Colvin", "San", "Sanjam Panda", + "Sariel", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", @@ -1067,6 +1078,7 @@ const char * auto_contributors[] { "TABLUM.IO", "TAC", "TCeason", + "TTPO100AJIEX", "Tagir Kuskarov", "Tai White", "Taleh Zaliyev", @@ -1089,6 +1101,7 @@ const char * auto_contributors[] { "Tiaonmmn", "Tigran Khudaverdyan", "Tim Liou", + "Tim MacDonald", "Tim Windelschmidt", "Timur Magomedov", "Timur Solodovnikov", @@ -1201,6 +1214,7 @@ const char * auto_contributors[] { "Xiaofei Hu", "Xin Wang", "Xoel Lopez Barata", + "Xu Jia", "Xudong Zhang", "Y Lu", "Yakko Majuri", @@ -1237,6 +1251,7 @@ const char * auto_contributors[] { "Yusuke Tanaka", "Zach Naimon", "Zheng Miao", + "ZhiHong Zhang", "ZhiYong Wang", "Zhichang Yu", "Zhichun Wu", @@ -1276,6 +1291,7 @@ const char * auto_contributors[] { "alexeyerm", "alexeypavlenko", "alfredlu", + "allegrinisante", "amesaru", "amoschen", "amudong", @@ -1287,6 +1303,7 @@ const char * auto_contributors[] { "anneji", "anneji-dev", "annvsh", + "anonymous", "anrodigina", "antikvist", "anton", @@ -1346,6 +1363,7 @@ const char * auto_contributors[] { "chenxing-xc", "chenxing.xc", "chertus", + "chloro", "chou.fan", "christophe.kalenzaga", "clarkcaoliu", @@ -1458,6 +1476,7 @@ const char * auto_contributors[] { "gyuton", "hanqf-git", "hao.he", + "haohang", "hardstep33", "hchen9", "hcz", @@ -1479,6 +1498,7 @@ const char * auto_contributors[] { "iammagicc", "ianton-ru", "ice1x", + "iceFireser", "idfer", "ifinik", "igomac", @@ -1642,6 +1662,7 @@ const char * auto_contributors[] { "mo-avatar", "mochi", "monchickey", + "morning-color", "morty", "moscas", "mosinnik", @@ -1695,6 +1716,7 @@ const char * auto_contributors[] { "philip.han", "pingyu", "pkubaj", + "pn", "potya", "pppeace", "presto53", @@ -1742,6 +1764,7 @@ const char * auto_contributors[] { "sanjam", "santaux", "santrancisco", + "sarielwxm", "satanson", "save-my-heart", "sdk2", @@ -1846,6 +1869,7 @@ const char * auto_contributors[] { "whysage", "wineternity", "woodlzm", + "wudidapaopao", "wuxiaobai24", "wxybear", "wzl", @@ -1860,6 +1884,7 @@ const char * auto_contributors[] { "xleoken", "xlwh", "xmy", + "xogoodnow", "xuelei", "xuzifu666", "yakkomajuri", diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp index 9682fbc74a1..5faa37d951e 100644 --- a/src/Storages/System/StorageSystemDashboards.cpp +++ b/src/Storages/System/StorageSystemDashboards.cpp @@ -212,6 +212,20 @@ FROM merge('system', '^asynchronous_metric_log') WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} AND metric = 'MaxPartCountForPartition' GROUP BY t ORDER BY t WITH FILL STEP {rounding:UInt32} +)EOQ") } + }, + { + { "dashboard", "Overview" }, + { "title", "Concurrent network connections" }, + { "query", trim(R"EOQ( +SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, + sum(CurrentMetric_TCPConnection) AS TCP_Connections, + sum(CurrentMetric_MySQLConnection) AS MySQL_Connections, + sum(CurrentMetric_HTTPConnection) AS HTTP_Connections +FROM merge('system', '^metric_log') +WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} +GROUP BY t +ORDER BY t WITH FILL STEP {rounding:UInt32} )EOQ") } }, /// Default dashboard for ClickHouse Cloud @@ -349,6 +363,11 @@ ORDER BY t WITH FILL STEP {rounding:UInt32} { "dashboard", "Cloud overview" }, { "title", "Network send bytes/sec" }, { "query", "SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, avg(value)\nFROM (\n SELECT event_time, sum(value) AS value\n FROM clusterAllReplicas(default, merge('system', '^asynchronous_metric_log'))\n WHERE event_date >= toDate(now() - {seconds:UInt32})\n AND event_time >= now() - {seconds:UInt32}\n AND metric LIKE 'NetworkSendBytes%'\n GROUP BY event_time)\nGROUP BY t\nORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" } + }, + { + { "dashboard", "Cloud overview" }, + { "title", "Concurrent network connections" }, + { "query", "SELECT toStartOfInterval(event_time, INTERVAL {rounding:UInt32} SECOND)::INT AS t, max(TCP_Connections), max(MySQL_Connections), max(HTTP_Connections) FROM (SELECT event_time, sum(CurrentMetric_TCPConnection) AS TCP_Connections, sum(CurrentMetric_MySQLConnection) AS MySQL_Connections, sum(CurrentMetric_HTTPConnection) AS HTTP_Connections FROM clusterAllReplicas(default, merge('system', '^metric_log')) WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32} GROUP BY event_time) GROUP BY t ORDER BY t WITH FILL STEP {rounding:UInt32} SETTINGS skip_unavailable_shards = 1" } } }; diff --git a/src/Storages/System/StorageSystemNamedCollections.cpp b/src/Storages/System/StorageSystemNamedCollections.cpp index 0836560dff0..e98ea155f30 100644 --- a/src/Storages/System/StorageSystemNamedCollections.cpp +++ b/src/Storages/System/StorageSystemNamedCollections.cpp @@ -33,7 +33,7 @@ void StorageSystemNamedCollections::fillData(MutableColumns & res_columns, Conte { const auto & access = context->getAccess(); - NamedCollectionUtils::loadIfNot(); + NamedCollectionFactory::instance().loadIfNot(); auto collections = NamedCollectionFactory::instance().getAll(); for (const auto & [name, collection] : collections) diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index 637182067f2..a1c9380f616 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -11,9 +11,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include @@ -26,12 +26,12 @@ ColumnsDescription StorageSystemS3Queue::getColumnsDescription() return ColumnsDescription { {"zookeeper_path", std::make_shared(), "Path in zookeeper to S3Queue metadata"}, + {"file_path", std::make_shared(), "File path of a file which is being processed by S3Queue"}, {"file_name", std::make_shared(), "File name of a file which is being processed by S3Queue"}, {"rows_processed", std::make_shared(), "Currently processed number of rows"}, {"status", std::make_shared(), "Status of processing: Processed, Processing, Failed"}, {"processing_start_time", std::make_shared(std::make_shared()), "Time at which processing of the file started"}, {"processing_end_time", std::make_shared(std::make_shared()), "Time at which processing of the file ended"}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "Profile events collected during processing of the file"}, {"exception", std::make_shared(), "Exception which happened during processing"}, }; } @@ -43,13 +43,14 @@ StorageSystemS3Queue::StorageSystemS3Queue(const StorageID & table_id_) void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { - for (const auto & [zookeeper_path, metadata] : S3QueueMetadataFactory::instance().getAll()) + for (const auto & [zookeeper_path, metadata] : ObjectStorageQueueMetadataFactory::instance().getAll()) { - for (const auto & [file_name, file_status] : metadata->getFileStatuses()) + for (const auto & [file_path, file_status] : metadata->getFileStatuses()) { size_t i = 0; res_columns[i++]->insert(zookeeper_path); - res_columns[i++]->insert(file_name); + res_columns[i++]->insert(file_path); + res_columns[i++]->insert(std::filesystem::path(file_path).filename().string()); res_columns[i++]->insert(file_status->processed_rows.load()); res_columns[i++]->insert(magic_enum::enum_name(file_status->state.load())); @@ -63,8 +64,6 @@ void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, co else res_columns[i++]->insertDefault(); - ProfileEvents::dumpToMapColumn(file_status->profile_counters.getPartiallyAtomicSnapshot(), res_columns[i++].get(), true); - res_columns[i++]->insert(file_status->getException()); } } diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index 651ca815420..339a59e88a5 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -12,7 +12,6 @@ #include #include #include -#include "Common/Scheduler/ResourceRequest.h" namespace DB diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index 2e848f68850..d242b6de4ec 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -81,7 +82,11 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context {"uncompressed_cache_size", {std::to_string(context->getUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, {"index_mark_cache_size", {std::to_string(context->getIndexMarkCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, {"index_uncompressed_cache_size", {std::to_string(context->getIndexUncompressedCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, - {"mmap_cache_size", {std::to_string(context->getMMappedFileCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}} + {"mmap_cache_size", {std::to_string(context->getMMappedFileCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, + + {"merge_workload", {context->getMergeWorkload(), ChangeableWithoutRestart::Yes}}, + {"mutation_workload", {context->getMutationWorkload(), ChangeableWithoutRestart::Yes}}, + {"config_reload_interval_ms", {std::to_string(context->getConfigReloaderInterval()), ChangeableWithoutRestart::Yes}} }; if (context->areBackgroundExecutorsInitialized()) diff --git a/src/Storages/System/StorageSystemSettingsChanges.cpp b/src/Storages/System/StorageSystemSettingsChanges.cpp index de47ec52031..d6c83870741 100644 --- a/src/Storages/System/StorageSystemSettingsChanges.cpp +++ b/src/Storages/System/StorageSystemSettingsChanges.cpp @@ -26,6 +26,7 @@ ColumnsDescription StorageSystemSettingsChanges::getColumnsDescription() void StorageSystemSettingsChanges::fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const { + const auto & settings_changes_history = getSettingsChangesHistory(); for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) { res_columns[0]->insert(it->first.toString()); diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 0c34f04844d..541bd852140 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -17,6 +17,9 @@ #include #include #include +#include +#include "base/types.h" +#include #include @@ -142,10 +145,19 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte } else if (auth_data.getType() == AuthenticationType::SSL_CERTIFICATE) { - Poco::JSON::Array::Ptr arr = new Poco::JSON::Array(); - for (const auto & common_name : auth_data.getSSLCertificateCommonNames()) - arr->add(common_name); - auth_params_json.set("common_names", arr); + Poco::JSON::Array::Ptr common_names = new Poco::JSON::Array(); + Poco::JSON::Array::Ptr subject_alt_names = new Poco::JSON::Array(); + + const auto & subjects = auth_data.getSSLCertificateSubjects(); + for (const String & subject : subjects.at(SSLCertificateSubjects::Type::CN)) + common_names->add(subject); + for (const String & subject : subjects.at(SSLCertificateSubjects::Type::SAN)) + subject_alt_names->add(subject); + + if (common_names->size() > 0) + auth_params_json.set("common_names", common_names); + if (subject_alt_names->size() > 0) + auth_params_json.set("subject_alt_names", subject_alt_names); } std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index a48b109fbbe..0720a2f24d9 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -16,7 +16,9 @@ namespace struct ZerosState { + explicit ZerosState(UInt64 limit) : add_total_rows(limit) { } std::atomic num_generated_rows = 0; + std::atomic add_total_rows = 0; }; using ZerosStatePtr = std::shared_ptr; @@ -42,10 +44,13 @@ protected: auto column_ptr = column; size_t column_size = column_ptr->size(); - if (state) + UInt64 total_rows = state->add_total_rows.fetch_and(0); + if (total_rows) + addTotalRowsApprox(total_rows); + + if (limit) { auto generated_rows = state->num_generated_rows.fetch_add(column_size, std::memory_order_acquire); - if (generated_rows >= limit) return {}; @@ -103,36 +108,25 @@ Pipe StorageSystemZeros::read( { storage_snapshot->check(column_names); - bool use_multiple_streams = multithreaded; + UInt64 query_limit = limit ? *limit : 0; + if (query_info.trivial_limit) + query_limit = query_limit ? std::min(query_limit, query_info.trivial_limit) : query_info.trivial_limit; - if (limit && *limit < max_block_size) - { - max_block_size = static_cast(*limit); - use_multiple_streams = false; - } + if (query_limit && query_limit < max_block_size) + max_block_size = query_limit; - if (!use_multiple_streams) + if (!multithreaded) num_streams = 1; + else if (query_limit && num_streams * max_block_size > query_limit) + /// We want to avoid spawning more streams than necessary + num_streams = std::min(num_streams, static_cast(((query_limit + max_block_size - 1) / max_block_size))); + + ZerosStatePtr state = std::make_shared(query_limit); Pipe res; - - ZerosStatePtr state; - - if (limit) - state = std::make_shared(); - for (size_t i = 0; i < num_streams; ++i) { - auto source = std::make_shared(max_block_size, limit ? *limit : 0, state); - - if (i == 0) - { - if (limit) - source->addTotalRowsApprox(*limit); - else if (query_info.limit) - source->addTotalRowsApprox(query_info.limit); - } - + auto source = std::make_shared(max_block_size, query_limit, state); res.addSource(std::move(source)); } diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index 950e20512c0..ec29b84dac3 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -36,7 +36,8 @@ ColumnsDescription StorageSystemZooKeeperConnection::getColumnsDescription() /* 9 */ {"xid", std::make_shared(), "XID of the current session."}, /* 10*/ {"enabled_feature_flags", std::make_shared(std::move(feature_flags_enum)), "Feature flags which are enabled. Only applicable to ClickHouse Keeper." - } + }, + /* 11*/ {"availability_zone", std::make_shared(), "Availability zone"}, }; } @@ -85,6 +86,7 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co columns[8]->insert(zookeeper->getClientID()); columns[9]->insert(zookeeper->getConnectionXid()); add_enabled_feature_flags(zookeeper); + columns[11]->insert(zookeeper->getConnectedHostAvailabilityZone()); } }; diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 6e7ea32ee59..f831465277d 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -166,8 +166,14 @@ static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndType { ExpressionAndSets result; auto ttl_string = queryToString(ast); - auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, columns); - ExpressionAnalyzer analyzer(ast, syntax_analyzer_result, context); + auto context_copy = Context::createCopy(context); + /// FIXME All code here will work with old analyzer, however for TTL + /// with subqueries it's possible that new analyzer will be enabled in ::read method + /// of underlying storage when all other parts of infra are not ready for it + /// (built with old analyzer). + context_copy->setSetting("allow_experimental_analyzer", Field{0}); + auto syntax_analyzer_result = TreeRewriter(context_copy).analyze(ast, columns); + ExpressionAnalyzer analyzer(ast, syntax_analyzer_result, context_copy); auto dag = analyzer.getActionsDAG(false); const auto * col = &dag->findInOutputs(ast->getColumnName()); @@ -177,7 +183,7 @@ static ExpressionAndSets buildExpressionAndSets(ASTPtr & ast, const NamesAndType dag->getOutputs() = {col}; dag->removeUnusedActions(); - result.expression = std::make_shared(dag, ExpressionActionsSettings::fromContext(context)); + result.expression = std::make_shared(dag, ExpressionActionsSettings::fromContext(context_copy)); result.sets = analyzer.getPreparedSets(); return result; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index cec55cefda2..778c9e13adb 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -111,7 +112,7 @@ void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) NameSet getVirtualNamesForFileLikeStorage() { - return {"_path", "_file", "_size"}; + return {"_path", "_file", "_size", "_time"}; } VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns) @@ -129,6 +130,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription add_virtual("_path", std::make_shared(std::make_shared())); add_virtual("_file", std::make_shared(std::make_shared())); add_virtual("_size", makeNullable(std::make_shared())); + add_virtual("_time", makeNullable(std::make_shared())); return desc; } @@ -187,32 +189,40 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const return block.getByName("_idx").column; } -void addRequestedPathFileAndSizeVirtualsToChunk( - Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional size, const String * filename) +void addRequestedFileLikeStorageVirtualsToChunk( + Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, + VirtualsForFileLikeStorage virtual_values) { for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") { - chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), path)->convertToFullColumnIfConst()); + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), virtual_values.path)->convertToFullColumnIfConst()); } else if (virtual_column.name == "_file") { - if (filename) + if (virtual_values.filename) { - chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *filename)->convertToFullColumnIfConst()); + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), (*virtual_values.filename))->convertToFullColumnIfConst()); } else { - size_t last_slash_pos = path.find_last_of('/'); - auto filename_from_path = path.substr(last_slash_pos + 1); + size_t last_slash_pos = virtual_values.path.find_last_of('/'); + auto filename_from_path = virtual_values.path.substr(last_slash_pos + 1); chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), filename_from_path)->convertToFullColumnIfConst()); } } else if (virtual_column.name == "_size") { - if (size) - chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *size)->convertToFullColumnIfConst()); + if (virtual_values.size) + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *virtual_values.size)->convertToFullColumnIfConst()); + else + chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_time") + { + if (virtual_values.last_modified) + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), virtual_values.last_modified->epochTime())->convertToFullColumnIfConst()); else chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 62f2e4855b5..fbfbdd6c6cc 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -68,8 +68,18 @@ void filterByPathOrFile(std::vector & sources, const std::vector & pa sources = std::move(filtered_sources); } -void addRequestedPathFileAndSizeVirtualsToChunk( - Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional size, const String * filename = nullptr); +struct VirtualsForFileLikeStorage +{ + const String & path; + std::optional size { std::nullopt }; + const String * filename { nullptr }; + std::optional last_modified { std::nullopt }; + +}; + +void addRequestedFileLikeStorageVirtualsToChunk( + Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, + VirtualsForFileLikeStorage virtual_values); } } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 8bca1c97aad..77e6ee9cb24 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include @@ -633,7 +633,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) }); builder.addSimpleTransform([&](const Block & current_header) { - return std::make_shared( + return std::make_shared( current_header, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); @@ -1068,9 +1068,10 @@ void StorageWindowView::threadFuncFireProc() if (max_watermark >= timestamp_now) clean_cache_task->schedule(); + UInt64 next_fire_ms = static_cast(next_fire_signal) * 1000; UInt64 timestamp_ms = static_cast(Poco::Timestamp().epochMicroseconds()) / 1000; if (!shutdown_called) - fire_task->scheduleAfter(std::max(UInt64(0), static_cast(next_fire_signal) * 1000 - timestamp_ms)); + fire_task->scheduleAfter(next_fire_ms - std::min(next_fire_ms, timestamp_ms)); } void StorageWindowView::threadFuncFireEvent() @@ -1532,7 +1533,7 @@ void StorageWindowView::writeIntoWindowView( builder = select_block.buildQueryPipeline(); builder.addSimpleTransform([&](const Block & current_header) { - return std::make_shared( + return std::make_shared( current_header, local_context->getSettingsRef().min_insert_block_size_rows, local_context->getSettingsRef().min_insert_block_size_bytes); diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 131712e750a..ed378169381 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -290,7 +290,7 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, size_t min_block_size_rows = mutable_context->getSettingsRef().min_external_table_block_size_rows; size_t min_block_size_bytes = mutable_context->getSettingsRef().min_external_table_block_size_bytes; - auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); + auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); builder->resize(1); builder->addTransform(std::move(squashing)); diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt index b4786b7313b..4f221efbd2b 100644 --- a/src/Storages/examples/CMakeLists.txt +++ b/src/Storages/examples/CMakeLists.txt @@ -5,4 +5,4 @@ clickhouse_add_executable (merge_selector2 merge_selector2.cpp) target_link_libraries (merge_selector2 PRIVATE dbms) clickhouse_add_executable (get_current_inserts_in_replicated get_current_inserts_in_replicated.cpp) -target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) +target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper clickhouse_functions) diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 47542b7b47e..628e5a85437 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -34,6 +34,7 @@ void registerStorageFuzzJSON(StorageFactory & factory); void registerStorageS3(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); void registerStorageS3Queue(StorageFactory & factory); +void registerStorageAzureQueue(StorageFactory & factory); #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); @@ -126,6 +127,10 @@ void registerStorages() registerStorageFuzzJSON(factory); #endif +#if USE_AZURE_BLOB_STORAGE + registerStorageAzureQueue(factory); +#endif + #if USE_AWS_S3 registerStorageHudi(factory); registerStorageS3Queue(factory); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 7e2d393c3d1..6765e112bb9 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -368,17 +368,21 @@ TEST(TransformQueryForExternalDatabase, Null) check(state, 1, {"field"}, "SELECT field FROM table WHERE field IS NULL", - R"(SELECT "field" FROM "test"."table" WHERE "field" IS NULL)"); + R"(SELECT "field" FROM "test"."table" WHERE "field" IS NULL)", + R"(SELECT "field" FROM "test"."table" WHERE 1 = 0)"); check(state, 1, {"field"}, "SELECT field FROM table WHERE field IS NOT NULL", - R"(SELECT "field" FROM "test"."table" WHERE "field" IS NOT NULL)"); + R"(SELECT "field" FROM "test"."table" WHERE "field" IS NOT NULL)", + R"(SELECT "field" FROM "test"."table")"); check(state, 1, {"field"}, "SELECT field FROM table WHERE isNull(field)", - R"(SELECT "field" FROM "test"."table" WHERE "field" IS NULL)"); + R"(SELECT "field" FROM "test"."table" WHERE "field" IS NULL)", + R"(SELECT "field" FROM "test"."table" WHERE 1 = 0)"); check(state, 1, {"field"}, "SELECT field FROM table WHERE isNotNull(field)", - R"(SELECT "field" FROM "test"."table" WHERE "field" IS NOT NULL)"); + R"(SELECT "field" FROM "test"."table" WHERE "field" IS NOT NULL)", + R"(SELECT "field" FROM "test"."table")"); } TEST(TransformQueryForExternalDatabase, ToDate) diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index afc458ea612..fc85bde11d9 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -288,7 +288,8 @@ String transformQueryForExternalDatabaseImpl( LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, - ContextPtr context) + ContextPtr context, + std::optional limit) { bool strict = context->getSettingsRef().external_table_strict_query; @@ -374,6 +375,9 @@ String transformQueryForExternalDatabaseImpl( select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(original_where)); } + if (limit) + select->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::make_shared(*limit)); + ASTPtr select_ptr = select; dropAliases(select_ptr); @@ -399,7 +403,8 @@ String transformQueryForExternalDatabase( LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, - ContextPtr context) + ContextPtr context, + std::optional limit) { if (!query_info.syntax_analyzer_result) { @@ -424,7 +429,8 @@ String transformQueryForExternalDatabase( literal_escaping_style, database, table, - context); + context, + limit); } auto clone_query = query_info.query->clone(); @@ -436,7 +442,8 @@ String transformQueryForExternalDatabase( literal_escaping_style, database, table, - context); + context, + limit); } } diff --git a/src/Storages/transformQueryForExternalDatabase.h b/src/Storages/transformQueryForExternalDatabase.h index fb6af21907e..2cd7e3676b5 100644 --- a/src/Storages/transformQueryForExternalDatabase.h +++ b/src/Storages/transformQueryForExternalDatabase.h @@ -21,6 +21,8 @@ class IAST; * and WHERE contains subset of (AND-ed) conditions from original query, * that contain only compatible expressions. * + * If limit is passed additionally apply LIMIT in result query. + * * Compatible expressions are comparisons of identifiers, constants, and logical operations on them. * * Throws INCORRECT_QUERY if external_table_strict_query (from context settings) @@ -34,6 +36,7 @@ String transformQueryForExternalDatabase( LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, - ContextPtr context); + ContextPtr context, + std::optional limit = {}); } diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index 137e1dc27fe..e5676c5c25d 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -36,7 +36,7 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr conte if (cached_columns.empty()) return executeImpl(ast_function, context, table_name, std::move(cached_columns), is_insert_query); - if (hasStaticStructure() && cached_columns == getActualTableStructure(context,is_insert_query)) + if (hasStaticStructure() && cached_columns == getActualTableStructure(context, is_insert_query)) return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns), is_insert_query); auto this_table_function = shared_from_this(); diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 5f798158a41..bd92465e1aa 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,11 +1,4 @@ -00725_memory_tracking 01624_soft_constraints 02354_vector_search_queries -02901_parallel_replicas_rollup -02999_scalar_subqueries_bug_2 -# Flaky list -01825_type_json_in_array -01414_mutations_and_errors_zookeeper -01287_max_execution_speed # Check after ConstantNode refactoring -02154_parser_backtracking +02944_variant_as_common_type diff --git a/tests/ci/artifacts_helper.py b/tests/ci/artifacts_helper.py index 37abf0bdefb..503ba2e1ec4 100644 --- a/tests/ci/artifacts_helper.py +++ b/tests/ci/artifacts_helper.py @@ -15,7 +15,7 @@ from github.Commit import Commit from build_download_helper import download_build_with_progress from commit_status_helper import post_commit_status from compress_files import SUFFIX, compress_fast, decompress_fast -from env_helper import CI, RUNNER_TEMP, S3_BUILDS_BUCKET +from env_helper import IS_CI, RUNNER_TEMP, S3_BUILDS_BUCKET from git_helper import SHA_REGEXP from report import FOOTER_HTML_TEMPLATE, HEAD_HTML_TEMPLATE, SUCCESS from s3_helper import S3Helper @@ -131,7 +131,7 @@ class ArtifactsHelper: post_commit_status(commit, SUCCESS, url, "Artifacts for workflow", "Artifacts") def _regenerate_index(self) -> None: - if CI: + if IS_CI: files = self._get_s3_objects() else: files = self._get_local_s3_objects() diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index b88a9476a6d..8bc0f51dfc7 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -6,7 +6,7 @@ import subprocess import sys from pathlib import Path -from build_download_helper import get_build_name_for_check, read_build_urls +from build_download_helper import read_build_urls from clickhouse_helper import CiLogsCredentials from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import REPORT_PATH, TEMP_PATH @@ -14,6 +14,7 @@ from pr_info import PRInfo from report import FAIL, FAILURE, OK, SUCCESS, JobReport, TestResult from stopwatch import Stopwatch from tee_popen import TeePopen +from ci_config import CI IMAGE_NAME = "clickhouse/fuzzer" @@ -64,7 +65,7 @@ def main(): docker_image = pull_image(get_docker_image(IMAGE_NAME)) - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) urls = read_build_urls(build_name, reports_path) if not urls: raise ValueError("No build URLs found") diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index d41fdaf05ff..71b18572938 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -7,7 +7,7 @@ import sys from pathlib import Path from typing import List, Sequence, Tuple -from ci_config import JobNames +from ci_config import CI from ci_utils import normalize_string from env_helper import TEMP_PATH from functional_test_check import NO_CHANGES_MSG @@ -92,16 +92,19 @@ def main(): logging.basicConfig(level=logging.INFO) # args = parse_args() stopwatch = Stopwatch() - jobs_to_validate = [JobNames.STATELESS_TEST_RELEASE, JobNames.INTEGRATION_TEST] + jobs_to_validate = [ + CI.JobNames.STATELESS_TEST_RELEASE, + CI.JobNames.INTEGRATION_TEST, + ] functional_job_report_file = Path(TEMP_PATH) / "functional_test_job_report.json" integration_job_report_file = Path(TEMP_PATH) / "integration_test_job_report.json" jobs_report_files = { - JobNames.STATELESS_TEST_RELEASE: functional_job_report_file, - JobNames.INTEGRATION_TEST: integration_job_report_file, + CI.JobNames.STATELESS_TEST_RELEASE: functional_job_report_file, + CI.JobNames.INTEGRATION_TEST: integration_job_report_file, } jobs_scripts = { - JobNames.STATELESS_TEST_RELEASE: "functional_test_check.py", - JobNames.INTEGRATION_TEST: "integration_test_check.py", + CI.JobNames.STATELESS_TEST_RELEASE: "functional_test_check.py", + CI.JobNames.INTEGRATION_TEST: "integration_test_check.py", } for test_job in jobs_to_validate: diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 260b77b0ee5..39f34ed9ccf 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -9,7 +9,7 @@ from pathlib import Path from typing import Tuple import docker_images_helper -from ci_config import CI_CONFIG, BuildConfig +from ci_config import CI from env_helper import REPO_COPY, S3_BUILDS_BUCKET, TEMP_PATH from git_helper import Git from lambda_shared_package.lambda_shared.pr import Labels @@ -27,7 +27,7 @@ IMAGE_NAME = "clickhouse/binary-builder" BUILD_LOG_NAME = "build_log.log" -def _can_export_binaries(build_config: BuildConfig) -> bool: +def _can_export_binaries(build_config: CI.BuildConfig) -> bool: if build_config.package_type != "deb": return False if build_config.sanitizer != "": @@ -38,7 +38,7 @@ def _can_export_binaries(build_config: BuildConfig) -> bool: def get_packager_cmd( - build_config: BuildConfig, + build_config: CI.BuildConfig, packager_path: Path, output_path: Path, build_version: str, @@ -147,7 +147,8 @@ def main(): stopwatch = Stopwatch() build_name = args.build_name - build_config = CI_CONFIG.build_config[build_name] + build_config = CI.JOB_CONFIGS[build_name].build_config + assert build_config temp_path = Path(TEMP_PATH) temp_path.mkdir(parents=True, exist_ok=True) diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 0f6c8e5aa8a..8482abb26e0 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -10,11 +10,25 @@ from typing import Any, Callable, List, Optional, Union import requests -import get_robot_token as grt # we need an updated ROBOT_TOKEN -from ci_config import CI_CONFIG +from ci_config import CI + +try: + # A work around for scripts using this downloading module without required deps + import get_robot_token as grt # we need an updated ROBOT_TOKEN +except ImportError: + + class grt: # type: ignore + ROBOT_TOKEN = None + + @staticmethod + def get_best_robot_token() -> str: + return "" + DOWNLOAD_RETRIES_COUNT = 5 +logger = logging.getLogger(__name__) + class DownloadException(Exception): pass @@ -30,7 +44,7 @@ def get_with_retries( sleep: int = 3, **kwargs: Any, ) -> requests.Response: - logging.info( + logger.info( "Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url ) exc = Exception("A placeholder to satisfy typing and avoid nesting") @@ -42,7 +56,7 @@ def get_with_retries( return response except Exception as e: if i + 1 < retries: - logging.info("Exception '%s' while getting, retry %i", e, i + 1) + logger.info("Exception '%s' while getting, retry %i", e, i + 1) time.sleep(sleep) exc = e @@ -63,15 +77,10 @@ def get_gh_api( """ def set_auth_header(): - if "headers" in kwargs: - if "Authorization" not in kwargs["headers"]: - kwargs["headers"][ - "Authorization" - ] = f"Bearer {grt.get_best_robot_token()}" - else: - kwargs["headers"] = { - "Authorization": f"Bearer {grt.get_best_robot_token()}" - } + headers = kwargs.get("headers", {}) + if "Authorization" not in headers: + headers["Authorization"] = f"Bearer {grt.get_best_robot_token()}" + kwargs["headers"] = headers if grt.ROBOT_TOKEN is not None: set_auth_header() @@ -96,7 +105,7 @@ def get_gh_api( ) try_auth = e.response.status_code == 404 if (ratelimit_exceeded or try_auth) and not token_is_set: - logging.warning( + logger.warning( "Received rate limit exception, setting the auth header and retry" ) set_auth_header() @@ -107,39 +116,35 @@ def get_gh_api( exc = e if try_cnt < retries: - logging.info("Exception '%s' while getting, retry %i", exc, try_cnt) + logger.info("Exception '%s' while getting, retry %i", exc, try_cnt) time.sleep(sleep) - raise APIException("Unable to request data from GH API") from exc - - -def get_build_name_for_check(check_name: str) -> str: - return CI_CONFIG.test_configs[check_name].required_build + raise APIException(f"Unable to request data from GH API: {url}") from exc def read_build_urls(build_name: str, reports_path: Union[Path, str]) -> List[str]: for root, _, files in os.walk(reports_path): for file in files: if file.endswith(f"_{build_name}.json"): - logging.info("Found build report json %s for %s", file, build_name) + logger.info("Found build report json %s for %s", file, build_name) with open( os.path.join(root, file), "r", encoding="utf-8" ) as file_handler: build_report = json.load(file_handler) return build_report["build_urls"] # type: ignore - logging.info("A build report is not found for %s", build_name) + logger.info("A build report is not found for %s", build_name) return [] def download_build_with_progress(url: str, path: Path) -> None: - logging.info("Downloading from %s to temp path %s", url, path) + logger.info("Downloading from %s to temp path %s", url, path) for i in range(DOWNLOAD_RETRIES_COUNT): try: response = get_with_retries(url, retries=1, stream=True) total_length = int(response.headers.get("content-length", 0)) if path.is_file() and total_length and path.stat().st_size == total_length: - logging.info( + logger.info( "The file %s already exists and have a proper size %s", path, total_length, @@ -148,14 +153,14 @@ def download_build_with_progress(url: str, path: Path) -> None: with open(path, "wb") as f: if total_length == 0: - logging.info( + logger.info( "No content-length, will download file without progress" ) f.write(response.content) else: dl = 0 - logging.info("Content length is %ld bytes", total_length) + logger.info("Content length is %ld bytes", total_length) for data in response.iter_content(chunk_size=4096): dl += len(data) f.write(data) @@ -170,8 +175,8 @@ def download_build_with_progress(url: str, path: Path) -> None: except Exception as e: if sys.stdout.isatty(): sys.stdout.write("\n") - if os.path.exists(path): - os.remove(path) + if path.exists(): + path.unlink() if i + 1 < DOWNLOAD_RETRIES_COUNT: time.sleep(3) @@ -182,7 +187,7 @@ def download_build_with_progress(url: str, path: Path) -> None: if sys.stdout.isatty(): sys.stdout.write("\n") - logging.info("Downloading finished") + logger.info("Downloading finished") def download_builds( @@ -191,7 +196,7 @@ def download_builds( for url in build_urls: if filter_fn(url): fname = os.path.basename(url.replace("%2B", "+").replace("%20", " ")) - logging.info("Will download %s to %s", fname, result_path) + logger.info("Will download %s to %s", fname, result_path) download_build_with_progress(url, result_path / fname) @@ -201,9 +206,9 @@ def download_builds_filter( result_path: Path, filter_fn: Callable[[str], bool] = lambda _: True, ) -> None: - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) urls = read_build_urls(build_name, reports_path) - logging.info("The build report for %s contains the next URLs: %s", build_name, urls) + logger.info("The build report for %s contains the next URLs: %s", build_name, urls) if not urls: raise DownloadException("No build URLs found") @@ -238,9 +243,9 @@ def download_clickhouse_binary( def get_clickhouse_binary_url( check_name: str, reports_path: Union[Path, str] ) -> Optional[str]: - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) urls = read_build_urls(build_name, reports_path) - logging.info("The build report for %s contains the next URLs: %s", build_name, urls) + logger.info("The build report for %s contains the next URLs: %s", build_name, urls) for url in urls: check_url = url if "?" in check_url: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 1d734fbb3f8..04c8d12fc30 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import argparse import json import logging import os @@ -6,7 +7,6 @@ import sys from pathlib import Path from typing import List -from ci_config import CI_CONFIG, Build from env_helper import ( GITHUB_JOB_URL, GITHUB_REPOSITORY, @@ -14,7 +14,7 @@ from env_helper import ( REPORT_PATH, TEMP_PATH, CI_CONFIG_PATH, - CI, + IS_CI, ) from pr_info import PRInfo from report import ( @@ -25,8 +25,10 @@ from report import ( JobReport, create_build_html_report, get_worst_status, + FAILURE, ) from stopwatch import Stopwatch +from ci_config import CI # Old way to read the neads_data NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "") @@ -46,16 +48,13 @@ def main(): "\n ".join(p.as_posix() for p in reports_path.rglob("*.json")), ) - build_check_name = sys.argv[1] + build_check_name = CI.JobNames.BUILD_CHECK pr_info = PRInfo() - builds_for_check = CI_CONFIG.get_builds_for_report( - build_check_name, - release=pr_info.is_release, - backport=pr_info.head_ref.startswith("backport/"), - ) - if CI: + args = parse_args() + + if (CI_CONFIG_PATH or IS_CI) and not args.reports: # In CI only specific builds might be manually selected, or some wf does not build all builds. # Filtering @builds_for_check to verify only builds that are present in the current CI workflow with open(CI_CONFIG_PATH, encoding="utf-8") as jfd: @@ -64,8 +63,15 @@ def main(): ci_config["jobs_data"]["jobs_to_skip"] + ci_config["jobs_data"]["jobs_to_do"] ) - builds_for_check = [job for job in builds_for_check if job in all_ci_jobs] - print(f"NOTE: following build reports will be accounted: [{builds_for_check}]") + builds_for_check = [job for job in CI.BuildNames if job in all_ci_jobs] + print("NOTE: builds for check taken from ci configuration") + else: + builds_for_check = parse_args().reports + for job in builds_for_check: + assert job in CI.BuildNames, "Builds must be known build job names" + print("NOTE: builds for check taken from input arguments") + + print(f"NOTE: following build reports will be checked: [{builds_for_check}]") required_builds = len(builds_for_check) missing_builds = 0 @@ -77,8 +83,8 @@ def main(): build_name, pr_info.number, pr_info.head_ref ) if not build_result: - if build_name == Build.FUZZERS: - logging.info("Build [%s] is missing - skip", Build.FUZZERS) + if build_name == CI.BuildNames.FUZZERS: + logging.info("Build [%s] is missing - skip", CI.BuildNames.FUZZERS) continue logging.warning("Build results for %s is missing", build_name) build_result = BuildResult.missing_result("missing") @@ -132,17 +138,16 @@ def main(): # Check if there are no builds at all, do not override bad status if summary_status == SUCCESS: if missing_builds: - summary_status = PENDING + summary_status = FAILURE elif ok_groups == 0: summary_status = ERROR - addition = "" - if missing_builds: - addition = ( - f" ({required_builds - missing_builds} of {required_builds} builds are OK)" - ) + description = "" - description = f"{ok_groups}/{total_groups} artifact groups are OK{addition}" + if missing_builds: + description = f"{missing_builds} of {required_builds} builds are missing." + + description += f" {ok_groups}/{total_groups} artifact groups are OK" JobReport( description=description, @@ -158,5 +163,16 @@ def main(): sys.exit(1) +def parse_args(): + parser = argparse.ArgumentParser("Generates overall build report") + + parser.add_argument( + "--reports", + nargs="+", + help="List of build reports to check", + ) + return parser.parse_args() + + if __name__ == "__main__": main() diff --git a/tests/ci/changelog.py b/tests/ci/changelog.py new file mode 100755 index 00000000000..3ba618f3ae5 --- /dev/null +++ b/tests/ci/changelog.py @@ -0,0 +1,461 @@ +#!/usr/bin/env python3 +# In our CI this script runs in style-test containers + +import argparse +import logging +import re +from datetime import date, timedelta +from pathlib import Path +from subprocess import DEVNULL +from typing import Any, Dict, List, Optional, TextIO + +import tqdm # type: ignore +from github.GithubException import RateLimitExceededException, UnknownObjectException +from github.NamedUser import NamedUser +from thefuzz.fuzz import ratio # type: ignore + +from cache_utils import GitHubCache +from env_helper import TEMP_PATH +from git_helper import git_runner, is_shallow +from github_helper import GitHub, PullRequest, PullRequests, Repository +from s3_helper import S3Helper +from version_helper import ( + FILE_WITH_VERSION_PATH, + get_abs_path, + get_version_from_repo, + get_version_from_tag, +) + +# This array gives the preferred category order, and is also used to +# normalize category names. +# Categories are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there +# updated accordingly +categories_preferred_order = ( + "Backward Incompatible Change", + "New Feature", + "Experimental Feature", + "Performance Improvement", + "Improvement", + "Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)", + "Bug Fix (user-visible misbehavior in an official stable release)", + "Build/Testing/Packaging Improvement", + "Other", +) + +FROM_REF = "" +TO_REF = "" +SHA_IN_CHANGELOG = [] # type: List[str] +gh = GitHub(create_cache_dir=False) +runner = git_runner + + +class Description: + def __init__( + self, number: int, user: NamedUser, html_url: str, entry: str, category: str + ): + self.number = number + self.html_url = html_url + self.user = gh.get_user_cached(user._rawData["login"]) # type: ignore + self.entry = entry + self.category = category + + @property + def formatted_entry(self) -> str: + # Substitute issue links. + # 1) issue number w/o markdown link + entry = re.sub( + r"([^[])#([0-9]{4,})", + r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", + self.entry, + ) + # 2) issue URL w/o markdown link + # including #issuecomment-1 or #event-12 + entry = re.sub( + r"([^(])(https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})[-#a-z0-9]*)", + r"\1[#\3](\2)", + entry, + ) + # It's possible that we face a secondary rate limit. + # In this case we should sleep until we get it + while True: + try: + user_name = self.user.name if self.user.name else self.user.login + break + except UnknownObjectException: + user_name = self.user.login + break + except RateLimitExceededException: + gh.sleep_on_rate_limit() + return ( + f"* {entry} [#{self.number}]({self.html_url}) " + f"([{user_name}]({self.user.html_url}))." + ) + + # Sort PR descriptions by numbers + def __eq__(self, other: Any) -> bool: + if not isinstance(self, type(other)): + raise NotImplementedError + return bool(self.number == other.number) + + def __lt__(self, other: "Description") -> bool: + return self.number < other.number + + +def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]: + descriptions = {} # type: Dict[str, List[Description]] + repos = {} # type: Dict[str, Repository] + for pr in prs: + # See https://github.com/PyGithub/PyGithub/issues/2202, + # obj._rawData doesn't spend additional API requests + # We'll save some requests + # pylint: disable=protected-access + repo_name = pr._rawData["base"]["repo"]["full_name"] + # pylint: enable=protected-access + if repo_name not in repos: + repos[repo_name] = pr.base.repo + in_changelog = False + merge_commit = pr.merge_commit_sha + if merge_commit is None: + logging.warning("PR %s does not have merge-commit, skipping", pr.number) + continue + + in_changelog = merge_commit in SHA_IN_CHANGELOG + if in_changelog: + desc = generate_description(pr, repos[repo_name]) + if desc: + if desc.category not in descriptions: + descriptions[desc.category] = [] + descriptions[desc.category].append(desc) + + for descs in descriptions.values(): + descs.sort() + + return descriptions + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Generate a changelog in Markdown format between given tags. " + "It fetches all tags and unshallow the git repository automatically", + ) + parser.add_argument( + "-v", + "--verbose", + action="count", + default=0, + help="set the script verbosity, could be used multiple", + ) + parser.add_argument( + "--debug-helpers", + action="store_true", + help="add debug logging for git_helper and github_helper", + ) + parser.add_argument( + "--output", + type=argparse.FileType("w"), + default="-", + help="output file for changelog", + ) + parser.add_argument( + "--repo", + default="ClickHouse/ClickHouse", + help="a repository to query for pull-requests from GitHub", + ) + parser.add_argument( + "--jobs", + type=int, + default=10, + help="number of jobs to get pull-requests info from GitHub API", + ) + parser.add_argument( + "--gh-user-or-token", + help="user name or GH token to authenticate", + ) + parser.add_argument( + "--gh-password", + help="a password that should be used when user is given", + ) + parser.add_argument( + "--with-testing-tags", + action="store_true", + help="by default '*-testing' tags are ignored, this argument enables them too", + ) + parser.add_argument( + "--from", + dest="from_ref", + help="git ref for a starting point of changelog, by default is calculated " + "automatically to match a previous tag in history", + ) + parser.add_argument( + "to_ref", + metavar="TO_REF", + help="git ref for the changelog end", + ) + args = parser.parse_args() + return args + + +# This function mirrors the PR description checks in ClickhousePullRequestTrigger. +# Returns None if the PR should not be mentioned in changelog. +def generate_description(item: PullRequest, repo: Repository) -> Optional[Description]: + backport_number = item.number + if item.head.ref.startswith("backport/"): + branch_parts = item.head.ref.split("/") + if len(branch_parts) == 3: + try: + item = gh.get_pull_cached(repo, int(branch_parts[-1])) + except Exception as e: + logging.warning("unable to get backported PR, exception: %s", e) + else: + logging.warning( + "The branch %s doesn't match backport template, using PR %s as is", + item.head.ref, + item.number, + ) + description = item.body + # Don't skip empty lines because they delimit parts of description + lines = [x.strip() for x in (description.split("\n") if description else [])] + lines = [re.sub(r"\s+", " ", ln) for ln in lines] + + category = "" + entry = "" + + if lines: + i = 0 + while i < len(lines): + if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]): + i += 1 + if i >= len(lines): + break + # Can have one empty line between header and the category itself. + # Filter it out. + if not lines[i]: + i += 1 + if i >= len(lines): + break + category = re.sub(r"^[-*\s]*", "", lines[i]) + i += 1 + elif re.match( + r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] + ): + i += 1 + # Can have one empty line between header and the entry itself. + # Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = " ".join(entry_lines) + else: + i += 1 + + # Remove excessive bullets from the entry. + if re.match(r"^[\-\*] ", entry): + entry = entry[2:] + + # Better style. + if re.match(r"^[a-z]", entry): + entry = entry.capitalize() + + if not category: + # Shouldn't happen, because description check in CI should catch such PRs. + # Fall through, so that it shows up in output and the user can fix it. + category = "NO CL CATEGORY" + + # Filter out documentations changelog before not-for-changelog + if re.match( + r"(?i)doc", + category, + ): + return None + + # Filter out the PR categories that are not for changelog. + if re.search( + r"(?i)((non|in|not|un)[-\s]*significant)|" + r"(not[ ]*for[ ]*changelog)|" + r"(changelog[ ]*entry[ ]*is[ ]*not[ ]*required)", + category, + ): + category = "NOT FOR CHANGELOG / INSIGNIFICANT" + # Sometimes we declare not for changelog but still write a description. Keep it + if len(entry) <= 4 or "Documentation entry" in entry: + entry = item.title + + # Normalize bug fixes + if ( + re.match( + r"(?i)bug\Wfix", + category, + ) + and "Critical Bug Fix" not in category + ): + category = "Bug Fix (user-visible misbehavior in an official stable release)" + + if backport_number != item.number: + entry = f"Backported in #{backport_number}: {entry}" + + if not entry: + # Shouldn't happen, because description check in CI should catch such PRs. + category = "NO CL ENTRY" + entry = "NO CL ENTRY: '" + item.title + "'" + + entry = entry.strip() + if entry[-1] != ".": + entry += "." + + for c in categories_preferred_order: + if ratio(category.lower(), c.lower()) >= 90: + category = c + break + + return Description(item.number, item.user, item.html_url, entry, category) + + +def write_changelog( + fd: TextIO, descriptions: Dict[str, List[Description]], year: int +) -> None: + to_commit = runner(f"git rev-parse {TO_REF}^{{}}")[:11] + from_commit = runner(f"git rev-parse {FROM_REF}^{{}}")[:11] + fd.write( + f"---\nsidebar_position: 1\nsidebar_label: {year}\n---\n\n" + f"# {year} Changelog\n\n" + f"### ClickHouse release {TO_REF} ({to_commit}) FIXME " + f"as compared to {FROM_REF} ({from_commit})\n\n" + ) + + seen_categories = [] # type: List[str] + for category in categories_preferred_order: + if category in descriptions: + seen_categories.append(category) + fd.write(f"#### {category}\n") + for desc in descriptions[category]: + fd.write(f"{desc.formatted_entry}\n") + + fd.write("\n") + + for category in sorted(descriptions): + if category not in seen_categories: + fd.write(f"#### {category}\n\n") + for desc in descriptions[category]: + fd.write(f"{desc.formatted_entry}\n") + + fd.write("\n") + + +def check_refs(from_ref: Optional[str], to_ref: str, with_testing_tags: bool) -> None: + global FROM_REF, TO_REF + TO_REF = to_ref + + # Check TO_REF + runner.run(f"git rev-parse {TO_REF}") + + # Check from_ref + if from_ref is not None: + runner.run(f"git rev-parse {FROM_REF}") + FROM_REF = from_ref + return + + # Get the cmake/autogenerated_versions.txt from FROM_REF to read the version + # If the previous tag is greater than version in the FROM_REF, + # then we need to add it to tags_to_exclude + temp_cmake = "tests/ci/tmp/autogenerated_versions.txt" + cmake_version = get_abs_path(temp_cmake) + cmake_version.write_text(runner(f"git show {TO_REF}:{FILE_WITH_VERSION_PATH}")) + to_ref_version = get_version_from_repo(cmake_version) + # Get all tags pointing to TO_REF + excluded_tags = runner.run(f"git tag --points-at '{TO_REF}^{{}}'").split("\n") + logging.info("All tags pointing to %s:\n%s", TO_REF, excluded_tags) + if not with_testing_tags: + excluded_tags.append("*-testing") + while not from_ref: + exclude = " ".join([f"--exclude='{tag}'" for tag in excluded_tags]) + from_ref_tag = runner(f"git describe --abbrev=0 --tags {exclude} '{TO_REF}'") + from_ref_version = get_version_from_tag(from_ref_tag) + if from_ref_version <= to_ref_version: + from_ref = from_ref_tag + break + excluded_tags.append(from_ref_tag) + + cmake_version.unlink() + FROM_REF = from_ref + + +def set_sha_in_changelog(): + global SHA_IN_CHANGELOG + SHA_IN_CHANGELOG = runner.run( + f"git log --format=format:%H {FROM_REF}..{TO_REF}" + ).split("\n") + + +def get_year(prs: PullRequests) -> int: + if not prs: + return date.today().year + return max(pr.created_at.year for pr in prs) + + +def main(): + log_levels = [logging.WARN, logging.INFO, logging.DEBUG] + args = parse_args() + logging.basicConfig( + format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s", + level=log_levels[min(args.verbose, 2)], + ) + if args.debug_helpers: + logging.getLogger("github_helper").setLevel(logging.DEBUG) + logging.getLogger("git_helper").setLevel(logging.DEBUG) + + # Get the full repo + if is_shallow(): + logging.info("Unshallow repository") + runner.run("git fetch --unshallow", stderr=DEVNULL) + logging.info("Fetching all tags") + runner.run("git fetch --tags", stderr=DEVNULL) + + check_refs(args.from_ref, args.to_ref, args.with_testing_tags) + set_sha_in_changelog() + + logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF) + + # use merge-base commit as a starting point, if used ref in another branch + base_commit = runner.run(f"git merge-base '{FROM_REF}^{{}}' '{TO_REF}^{{}}'") + # Get starting and ending dates for gathering PRs + # Add one day after and before to mitigate TZ possible issues + # `tag^{}` format gives commit ref when we have annotated tags + # format %cs gives a committer date, works better for cherry-picked commits + from_date = runner.run(f"git log -1 --format=format:%cs '{base_commit}'") + to_date = runner.run(f"git log -1 --format=format:%cs '{TO_REF}^{{}}'") + merged = ( + date.fromisoformat(from_date) - timedelta(1), + date.fromisoformat(to_date) + timedelta(1), + ) + + # Get all PRs for the given time frame + global gh + gh = GitHub( + args.gh_user_or_token, + args.gh_password, + create_cache_dir=False, + per_page=100, + pool_size=args.jobs, + ) + temp_path = Path(TEMP_PATH) + gh_cache = GitHubCache(gh.cache_path, temp_path, S3Helper()) + gh_cache.download() + query = f"type:pr repo:{args.repo} is:merged" + prs = gh.get_pulls_from_search( + query=query, merged=merged, sort="created", progress_func=tqdm.tqdm + ) + + descriptions = get_descriptions(prs) + changelog_year = get_year(prs) + + write_changelog(args.output, descriptions, changelog_year) + gh_cache.upload() + + +if __name__ == "__main__": + main() diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index e470621e2c5..459be12ada0 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -127,13 +127,11 @@ close it. to_pop.append(i) elif pr.head.ref.startswith(f"backport/{self.name}"): self.backport_pr = pr + self._backported = True to_pop.append(i) else: assert False, f"BUG! Invalid PR's branch [{pr.head.ref}]" - # Cherry-pick or backport PR found, set @backported flag for current release branch - self._backported = True - for i in reversed(to_pop): # Going from the tail to keep the order and pop greater index first prs.pop(i) @@ -218,6 +216,7 @@ close it. self.name, self.pr.number, ) + self._backported = True return except CalledProcessError: # There are most probably conflicts, they'll be resolved in PR @@ -247,7 +246,6 @@ close it. self.cherrypick_pr.add_to_labels(Labels.PR_CRITICAL_BUGFIX) elif Labels.PR_BUGFIX in [label.name for label in self.pr.labels]: self.cherrypick_pr.add_to_labels(Labels.PR_BUGFIX) - self._backported = True self._assign_new_pr(self.cherrypick_pr) # update cherrypick PR to get the state for PR.mergable self.cherrypick_pr.update() @@ -359,10 +357,10 @@ class Backport: self._fetch_from = fetch_from self.dry_run = dry_run - self.must_create_backport_label = ( - Labels.MUST_BACKPORT + self.must_create_backport_labels = ( + [Labels.MUST_BACKPORT] if self._repo_name == self._fetch_from - else Labels.MUST_BACKPORT_CLOUD + else [Labels.MUST_BACKPORT_CLOUD, Labels.MUST_BACKPORT] ) self.backport_created_label = ( Labels.PR_BACKPORTS_CREATED @@ -468,7 +466,7 @@ class Backport: query_args = { "query": f"type:pr repo:{self._fetch_from} -label:{self.backport_created_label}", "label": ",".join( - self.labels_to_backport + [self.must_create_backport_label] + self.labels_to_backport + self.must_create_backport_labels ), "merged": [since_date, tomorrow], } @@ -492,7 +490,10 @@ class Backport: def process_pr(self, pr: PullRequest) -> None: pr_labels = [label.name for label in pr.labels] - if self.must_create_backport_label in pr_labels: + if ( + any(label in pr_labels for label in self.must_create_backport_labels) + or self._repo_name != self._fetch_from + ): branches = [ ReleaseBranch(br, pr, self.repo, self.backport_created_label) for br in self.release_branches @@ -531,9 +532,9 @@ class Backport: for br in branches: br.process(self.dry_run) - for br in branches: - assert br.backported, f"BUG! backport to branch [{br}] failed" - self.mark_pr_backported(pr) + if all(br.backported for br in branches): + # And check it after the running + self.mark_pr_backported(pr) def mark_pr_backported(self, pr: PullRequest) -> None: if self.dry_run: diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 55a18a2f335..af2f4c0a1fc 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -13,14 +13,7 @@ from typing import Any, Dict, List, Optional import docker_images_helper import upload_result_helper from build_check import get_release_or_pr -from ci_config import ( - CI_CONFIG, - Build, - CILabels, - CIStages, - JobNames, - StatusNames, -) +from ci_config import CI from ci_metadata import CiMetadata from ci_utils import GHActions, normalize_string from clickhouse_helper import ( @@ -38,10 +31,11 @@ from commit_status_helper import ( get_commit, post_commit_status, set_status_comment, + get_commit_filtered_statuses, ) from digest_helper import DockerDigester from env_helper import ( - CI, + IS_CI, GITHUB_JOB_API_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID, @@ -295,7 +289,7 @@ def _mark_success_action( batch: int, ) -> None: ci_cache = CiCache(s3, indata["jobs_data"]["digests"]) - job_config = CI_CONFIG.get_job_config(job) + job_config = CI.get_job_config(job) num_batches = job_config.num_batches # if batch is not provided - set to 0 batch = 0 if batch == -1 else batch @@ -305,7 +299,7 @@ def _mark_success_action( # FIXME: find generic design for propagating and handling job status (e.g. stop using statuses in GH api) # now job ca be build job w/o status data, any other job that exit with 0 with or w/o status data - if CI_CONFIG.is_build_job(job): + if CI.is_build_job(job): # there is no CommitStatus for build jobs # create dummy status relying on JobReport # FIXME: consider creating commit status for build jobs too, to treat everything the same way @@ -424,6 +418,8 @@ def _configure_jobs( s3: S3Helper, pr_info: PRInfo, ci_settings: CiSettings, + skip_jobs: bool, + dry_run: bool = False, ) -> CiCache: """ returns CICache instance with configured job's data @@ -434,20 +430,31 @@ def _configure_jobs( """ # get all jobs - job_configs = CI_CONFIG.get_workflow_jobs_with_configs( - is_mq=pr_info.is_merge_queue, - is_docs_only=pr_info.has_changes_in_documentation_only(), - is_master=pr_info.is_master, - ) + if not skip_jobs: + job_configs = CI.get_workflow_jobs_with_configs( + is_mq=pr_info.is_merge_queue, + is_docs_only=pr_info.has_changes_in_documentation_only(), + is_master=pr_info.is_master, + is_pr=pr_info.is_pr, + ) + else: + job_configs = {} # filter jobs in accordance with ci settings job_configs = ci_settings.apply( - job_configs, pr_info.is_release, is_pr=pr_info.is_pr, labels=pr_info.labels + job_configs, + pr_info.is_release, + is_pr=pr_info.is_pr, + is_mq=pr_info.is_merge_queue, + labels=pr_info.labels, ) # check jobs in ci cache ci_cache = CiCache.calc_digests_and_create( - s3, job_configs, cache_enabled=not ci_settings.no_ci_cache and CI + s3, + job_configs, + cache_enabled=not ci_settings.no_ci_cache and not skip_jobs and IS_CI, + dry_run=dry_run, ) ci_cache.update() ci_cache.apply(job_configs, is_release=pr_info.is_release) @@ -455,7 +462,9 @@ def _configure_jobs( return ci_cache -def _generate_ci_stage_config(jobs_data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: +def _generate_ci_stage_config( + jobs_data: Dict[str, Any], non_blocking_mode: bool = False +) -> Dict[str, Dict[str, Any]]: """ populates GH Actions' workflow with real jobs "Builds_1": [{"job_name": NAME, "runner_type": RUNNER_TYPE}] @@ -465,14 +474,14 @@ def _generate_ci_stage_config(jobs_data: Dict[str, Any]) -> Dict[str, Dict[str, result = {} # type: Dict[str, Any] stages_to_do = [] for job in jobs_data: - stage_type = CI_CONFIG.get_job_ci_stage(job) - if stage_type == CIStages.NA: + stage_type = CI.get_job_ci_stage(job, non_blocking_ci=non_blocking_mode) + if stage_type == CI.WorkflowStages.NA: continue if stage_type not in result: result[stage_type] = [] stages_to_do.append(stage_type) result[stage_type].append( - {"job_name": job, "runner_type": CI_CONFIG.get_runner_type(job)} + {"job_name": job, "runner_type": CI.JOB_CONFIGS[job].runner_type} ) result["stages_to_do"] = stages_to_do return result @@ -519,10 +528,10 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None: if job not in jobs_to_skip and job not in jobs_to_do: # no need to create status for job that are not supposed to be executed continue - if CI_CONFIG.is_build_job(job): + if CI.is_build_job(job): # no GH status for build jobs continue - job_config = CI_CONFIG.get_job_config(job) + job_config = CI.get_job_config(job) if not job_config: # there might be a new job that does not exist on this branch - skip it continue @@ -548,7 +557,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]: res = [ match for match in matches - if match in CILabels or match.startswith("job_") or match.startswith("batch_") + if match in CI.Tags or match.startswith("job_") or match.startswith("batch_") ] print(f"CI modifiers from commit message: [{res}]") res_2 = [] @@ -557,7 +566,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]: res_2 = [ match for match in matches - if match in CILabels + if match in CI.Tags or match.startswith("job_") or match.startswith("batch_") ] @@ -633,7 +642,7 @@ def _upload_build_artifacts( print(f"Report file has been uploaded to [{report_url}]") # Upload master head's binaries - static_bin_name = CI_CONFIG.build_config[build_name].static_binary_name + static_bin_name = CI.get_build_config(build_name).static_binary_name if pr_info.is_master and static_bin_name: # Full binary with debug info: s3_path_full = "/".join((pr_info.base_ref, static_bin_name, "clickhouse-full")) @@ -828,15 +837,15 @@ def _add_build_to_version_history( def _run_test(job_name: str, run_command: str) -> int: assert ( - run_command or CI_CONFIG.get_job_config(job_name).run_command + run_command or CI.get_job_config(job_name).run_command ), "Run command must be provided as input argument or be configured in job config" env = os.environ.copy() - timeout = CI_CONFIG.get_job_config(job_name).timeout or None + timeout = CI.get_job_config(job_name).timeout or None if not run_command: run_command = "/".join( - (os.path.dirname(__file__), CI_CONFIG.get_job_config(job_name).run_command) + (os.path.dirname(__file__), CI.get_job_config(job_name).run_command) ) if ".py" in run_command and not run_command.startswith("python"): run_command = "python3 " + run_command @@ -903,13 +912,23 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> No def _set_pending_statuses(pr_info: PRInfo) -> None: commit = get_commit(GitHub(get_best_robot_token(), per_page=100), pr_info.sha) try: - print("Set SYNC status to pending") - commit.create_status( - state=PENDING, - target_url="", - description="", - context=StatusNames.SYNC, - ) + found = False + statuses = get_commit_filtered_statuses(commit) + for commit_status in statuses: + if commit_status.context == CI.StatusNames.SYNC: + print( + f"Sync status found [{commit_status.state}], [{commit_status.description}] - won't be overwritten" + ) + found = True + break + if not found: + print("Set Sync status to pending") + commit.create_status( + state=PENDING, + target_url="", + description=CI.SyncState.PENDING, + context=CI.StatusNames.SYNC, + ) except Exception as ex: print(f"ERROR: failed to set GH commit status, ex: {ex}") @@ -942,7 +961,7 @@ def main() -> int: ### CONFIGURE action: start if args.configure: - if CI and pr_info.is_pr: + if IS_CI and pr_info.is_pr: # store meta on s3 (now we need it only for PRs) meta = CiMetadata(s3, pr_info.number, pr_info.head_ref) meta.run_id = int(GITHUB_RUN_ID) @@ -952,7 +971,7 @@ def main() -> int: args.commit_message or None, update_from_api=True ) - if ci_settings.no_merge_commit and CI: + if ci_settings.no_merge_commit and IS_CI: git_runner.run(f"{GIT_PREFIX} checkout {pr_info.sha}") git_ref = git_runner.run(f"{GIT_PREFIX} rev-parse HEAD") @@ -971,33 +990,37 @@ def main() -> int: s3, pr_info, ci_settings, + args.skip_jobs, ) ci_cache.print_status() - if CI and not pr_info.is_merge_queue: + if IS_CI and not pr_info.is_merge_queue: # wait for pending jobs to be finished, await_jobs is a long blocking call ci_cache.await_pending_jobs(pr_info.is_release) if pr_info.is_release: + print("Release/master: CI Cache add pending records for all todo jobs") ci_cache.push_pending_all(pr_info.is_release) # conclude results result["git_ref"] = git_ref result["version"] = version - result["build"] = ci_cache.job_digests[Build.PACKAGE_RELEASE] - result["docs"] = ci_cache.job_digests[JobNames.DOCS_CHECK] + result["build"] = ci_cache.job_digests[CI.BuildNames.PACKAGE_RELEASE] + result["docs"] = ci_cache.job_digests[CI.JobNames.DOCS_CHECK] result["ci_settings"] = ci_settings.as_dict() if not args.skip_jobs: - result["stages_data"] = _generate_ci_stage_config(ci_cache.jobs_to_do) - result["jobs_data"] = { - "jobs_to_do": list(ci_cache.jobs_to_do), - "jobs_to_skip": ci_cache.jobs_to_skip, - "digests": ci_cache.job_digests, - "jobs_params": { - job: {"batches": config.batches, "num_batches": config.num_batches} - for job, config in ci_cache.jobs_to_do.items() - }, - } + result["stages_data"] = _generate_ci_stage_config( + ci_cache.jobs_to_do, ci_settings.woolen_wolfdog + ) + result["jobs_data"] = { + "jobs_to_do": list(ci_cache.jobs_to_do), + "jobs_to_skip": ci_cache.jobs_to_skip, + "digests": ci_cache.job_digests, + "jobs_params": { + job: {"batches": config.batches, "num_batches": config.num_batches} + for job, config in ci_cache.jobs_to_do.items() + }, + } result["docker_data"] = docker_data ### CONFIGURE action: end @@ -1016,7 +1039,7 @@ def main() -> int: f"Check if rerun for name: [{check_name}], extended name [{check_name_with_group}]" ) previous_status = None - if CI_CONFIG.is_build_job(check_name): + if CI.is_build_job(check_name): # this is a build job - check if a build report is present build_result = ( BuildResult.load_any(check_name, pr_info.number, pr_info.head_ref) @@ -1042,25 +1065,25 @@ def main() -> int: ) # rerun helper check - # FIXME: remove rerun_helper check and rely on ci cache only + # FIXME: Find a way to identify if job restarted manually (by developer) or by automatic workflow restart (died spot-instance) + # disable rerun check for the former if check_name not in ( - # we might want to rerun reports' jobs - disable rerun check for them - JobNames.BUILD_CHECK, - JobNames.BUILD_CHECK_SPECIAL, - ): + CI.JobNames.BUILD_CHECK, + ): # we might want to rerun build report job rerun_helper = RerunHelper(commit, check_name_with_group) if rerun_helper.is_already_finished_by_status(): + print("WARNING: Rerunning job with GH status ") status = rerun_helper.get_finished_status() assert status - previous_status = status.state print("::group::Commit Status") print(status) print("::endgroup::") + previous_status = status.state # ci cache check if not previous_status and not ci_settings.no_ci_cache: ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() - job_config = CI_CONFIG.get_job_config(check_name) + job_config = CI.get_job_config(check_name) if ci_cache.is_successful( check_name, args.batch, @@ -1100,7 +1123,7 @@ def main() -> int: ch_helper = ClickHouseHelper() check_url = "" - if CI_CONFIG.is_build_job(args.job_name): + if CI.is_build_job(args.job_name): assert ( indata ), f"--infile with config must be provided for POST action of a build type job [{args.job_name}]" @@ -1108,8 +1131,7 @@ def main() -> int: # upload binaries only for normal builds in PRs upload_binary = ( not pr_info.is_pr - or args.job_name - not in CI_CONFIG.get_builds_for_report(JobNames.BUILD_CHECK_SPECIAL) + or CI.get_job_ci_stage(args.job_name) == CI.WorkflowStages.BUILDS_1 or CiSettings.create_from_run_config(indata).upload_all ) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 9ce8923f58f..8ee0ae54385 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -5,7 +5,8 @@ from enum import Enum from pathlib import Path from typing import Dict, Optional, Any, Union, Sequence, List, Set -from ci_config import JobNames, Build, CI_CONFIG, JobConfig +from ci_config import CI + from ci_utils import is_hex, GHActions from commit_status_helper import CommitStatusData from env_helper import ( @@ -41,7 +42,7 @@ class CiCache: release - for jobs being executed on the release branch including master branch (not a PR branch) """ - _REQUIRED_DIGESTS = [JobNames.DOCS_CHECK, Build.PACKAGE_RELEASE] + _REQUIRED_DIGESTS = [CI.JobNames.DOCS_CHECK, CI.BuildNames.PACKAGE_RELEASE] _S3_CACHE_PREFIX = "CI_cache_v1" _CACHE_BUILD_REPORT_PREFIX = "build_report" _RECORD_FILE_EXTENSION = ".ci" @@ -80,7 +81,7 @@ class CiCache: @classmethod def is_docs_job(cls, job_name: str) -> bool: - return job_name == JobNames.DOCS_CHECK + return job_name == CI.JobNames.DOCS_CHECK @classmethod def is_srcs_job(cls, job_name: str) -> bool: @@ -105,8 +106,8 @@ class CiCache: ): self.enabled = cache_enabled self.jobs_to_skip = [] # type: List[str] - self.jobs_to_wait = {} # type: Dict[str, JobConfig] - self.jobs_to_do = {} # type: Dict[str, JobConfig] + self.jobs_to_wait = {} # type: Dict[str, CI.JobConfig] + self.jobs_to_do = {} # type: Dict[str, CI.JobConfig] self.s3 = s3 self.job_digests = job_digests self.cache_s3_paths = { @@ -127,9 +128,13 @@ class CiCache: @classmethod def calc_digests_and_create( - cls, s3: S3Helper, job_configs: Dict[str, JobConfig], cache_enabled: bool = True + cls, + s3: S3Helper, + job_configs: Dict[str, CI.JobConfig], + cache_enabled: bool = True, + dry_run: bool = False, ) -> "CiCache": - job_digester = JobDigester() + job_digester = JobDigester(dry_run=dry_run) digests = {} print("::group::Job Digests") @@ -140,9 +145,7 @@ class CiCache: for job in cls._REQUIRED_DIGESTS: if job not in job_configs: - digest = job_digester.get_job_digest( - CI_CONFIG.get_job_config(job).digest - ) + digest = job_digester.get_job_digest(CI.get_job_config(job).digest) digests[job] = digest print( f" job [{job.rjust(50)}] required for CI Cache has digest [{digest}]" @@ -154,10 +157,10 @@ class CiCache: self, job_digests: Dict[str, str], job_type: JobType ) -> str: if job_type == self.JobType.DOCS: - res = job_digests[JobNames.DOCS_CHECK] + res = job_digests[CI.JobNames.DOCS_CHECK] elif job_type == self.JobType.SRCS: - if Build.PACKAGE_RELEASE in job_digests: - res = job_digests[Build.PACKAGE_RELEASE] + if CI.BuildNames.PACKAGE_RELEASE in job_digests: + res = job_digests[CI.BuildNames.PACKAGE_RELEASE] else: assert False, "BUG, no build job in digest' list" else: @@ -648,7 +651,7 @@ class CiCache: report_path = Path(REPORT_PATH) report_path.mkdir(exist_ok=True, parents=True) path = ( - self._get_record_s3_path(Build.PACKAGE_RELEASE) + self._get_record_s3_path(CI.BuildNames.PACKAGE_RELEASE) + self._CACHE_BUILD_REPORT_PREFIX ) if file_prefix: @@ -664,13 +667,14 @@ class CiCache: def upload_build_report(self, build_result: BuildResult) -> str: result_json_path = build_result.write_json(Path(TEMP_PATH)) s3_path = ( - self._get_record_s3_path(Build.PACKAGE_RELEASE) + result_json_path.name + self._get_record_s3_path(CI.BuildNames.PACKAGE_RELEASE) + + result_json_path.name ) return self.s3.upload_file( bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path ) - def await_pending_jobs(self, is_release: bool) -> None: + def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ await pending jobs to be finished @jobs_with_params - jobs to await. {JOB_NAME: {"batches": [BATCHES...], "num_batches": NUM_BATCHES}} @@ -687,21 +691,14 @@ class CiCache: MAX_JOB_NUM_TO_WAIT = 3 round_cnt = 0 - # FIXME: temporary experiment: lets enable await for PR' workflows awaiting on build' jobs only + # FIXME: temporary experiment: lets enable await for PR' workflows but for a shorter time if not is_release: - MAX_ROUNDS_TO_WAIT = 1 - remove_from_wait = [] - for job in self.jobs_to_wait: - if job not in Build: - remove_from_wait.append(job) - for job in remove_from_wait: - del self.jobs_to_wait[job] + MAX_ROUNDS_TO_WAIT = 3 while ( len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT and round_cnt < MAX_ROUNDS_TO_WAIT ): - await_finished: Set[str] = set() round_cnt += 1 GHActions.print_in_group( f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:", @@ -713,11 +710,13 @@ class CiCache: expired_sec = 0 start_at = int(time.time()) while expired_sec < TIMEOUT and self.jobs_to_wait: - time.sleep(poll_interval_sec) + await_finished: Set[str] = set() + if not dry_run: + time.sleep(poll_interval_sec) self.update() for job_name, job_config in self.jobs_to_wait.items(): num_batches = job_config.num_batches - job_config = CI_CONFIG.get_job_config(job_name) + job_config = CI.get_job_config(job_name) assert job_config.pending_batches assert job_config.batches pending_batches = list(job_config.pending_batches) @@ -741,12 +740,11 @@ class CiCache: f"Job [{job_name}_[{batch}/{num_batches}]] is not pending anymore" ) job_config.batches.remove(batch) - job_config.pending_batches.remove(batch) else: print( f"NOTE: Job [{job_name}:{batch}] finished failed - do not add to ready" ) - job_config.pending_batches.remove(batch) + job_config.pending_batches.remove(batch) if not job_config.pending_batches: await_finished.add(job_name) @@ -754,23 +752,25 @@ class CiCache: for job in await_finished: self.jobs_to_skip.append(job) del self.jobs_to_wait[job] + del self.jobs_to_do[job] - expired_sec = int(time.time()) - start_at - print( - f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]" - ) - if await_finished: - GHActions.print_in_group( - f"Finished jobs, round [{round_cnt}]: [{list(await_finished)}]", - list(await_finished), - ) + if not dry_run: + expired_sec = int(time.time()) - start_at + print( + f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]" + ) + else: + # make up for 2 iterations in dry_run + expired_sec += int(TIMEOUT / 2) + 1 GHActions.print_in_group( "Remaining jobs:", [list(self.jobs_to_wait)], ) - def apply(self, job_configs: Dict[str, JobConfig], is_release: bool) -> "CiCache": + def apply( + self, job_configs: Dict[str, CI.JobConfig], is_release: bool + ) -> "CiCache": if not self.enabled: self.jobs_to_do = job_configs return self diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index cb40f7899a4..bef43083a35 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,750 +1,621 @@ -#!/usr/bin/env python3 - -import logging import random import re from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser -from copy import deepcopy -from dataclasses import dataclass, field -from pathlib import Path -from typing import Callable, Dict, Iterable, List, Literal, Optional, Union +from typing import Dict, Optional, List -from ci_utils import WithIter, normalize_string -from integration_test_images import IMAGES +from ci_utils import normalize_string +from ci_definitions import * -class WorkFlows(metaclass=WithIter): - PULL_REQUEST = "PULL_REQUEST" - MASTER = "MASTER" - BACKPORT = "BACKPORT" - RELEASE = "RELEASE" - SYNC = "SYNC" - - -class CIStages(metaclass=WithIter): - NA = "UNKNOWN" - BUILDS_1 = "Builds_1" - BUILDS_2 = "Builds_2" - TESTS_1 = "Tests_1" - TESTS_2 = "Tests_2" - TESTS_3 = "Tests_3" - - -class Runners(metaclass=WithIter): - BUILDER = "builder" - STYLE_CHECKER = "style-checker" - STYLE_CHECKER_ARM = "style-checker-aarch64" - FUNC_TESTER = "func-tester" - FUNC_TESTER_ARM = "func-tester-aarch64" - STRESS_TESTER = "stress-tester" - FUZZER_UNIT_TESTER = "fuzzer-unit-tester" - - -class CILabels(metaclass=WithIter): - """ - Label names or commit tokens in normalized form - """ - - DO_NOT_TEST_LABEL = "do_not_test" - NO_MERGE_COMMIT = "no_merge_commit" - NO_CI_CACHE = "no_ci_cache" - # to upload all binaries from build jobs - UPLOAD_ALL_ARTIFACTS = "upload_all" - CI_SET_SYNC = "ci_set_sync" - CI_SET_ARM = "ci_set_arm" - CI_SET_REQUIRED = "ci_set_required" - CI_SET_NON_REQUIRED = "ci_set_non_required" - CI_SET_OLD_ANALYZER = "ci_set_old_analyzer" - - libFuzzer = "libFuzzer" - - -class Build(metaclass=WithIter): - PACKAGE_RELEASE = "package_release" - PACKAGE_AARCH64 = "package_aarch64" - PACKAGE_ASAN = "package_asan" - PACKAGE_UBSAN = "package_ubsan" - PACKAGE_TSAN = "package_tsan" - PACKAGE_MSAN = "package_msan" - PACKAGE_DEBUG = "package_debug" - PACKAGE_RELEASE_COVERAGE = "package_release_coverage" - BINARY_RELEASE = "binary_release" - BINARY_TIDY = "binary_tidy" - BINARY_DARWIN = "binary_darwin" - BINARY_AARCH64 = "binary_aarch64" - BINARY_AARCH64_V80COMPAT = "binary_aarch64_v80compat" - BINARY_FREEBSD = "binary_freebsd" - BINARY_DARWIN_AARCH64 = "binary_darwin_aarch64" - BINARY_PPC64LE = "binary_ppc64le" - BINARY_AMD64_COMPAT = "binary_amd64_compat" - BINARY_AMD64_MUSL = "binary_amd64_musl" - BINARY_RISCV64 = "binary_riscv64" - BINARY_S390X = "binary_s390x" - BINARY_LOONGARCH64 = "binary_loongarch64" - FUZZERS = "fuzzers" - - -class JobNames(metaclass=WithIter): - STYLE_CHECK = "Style check" - FAST_TEST = "Fast test" - DOCKER_SERVER = "Docker server image" - DOCKER_KEEPER = "Docker keeper image" - INSTALL_TEST_AMD = "Install packages (amd64)" - INSTALL_TEST_ARM = "Install packages (arm64)" - - STATELESS_TEST_DEBUG = "Stateless tests (debug)" - STATELESS_TEST_RELEASE = "Stateless tests (release)" - STATELESS_TEST_RELEASE_COVERAGE = "Stateless tests (coverage)" - STATELESS_TEST_AARCH64 = "Stateless tests (aarch64)" - STATELESS_TEST_ASAN = "Stateless tests (asan)" - STATELESS_TEST_TSAN = "Stateless tests (tsan)" - STATELESS_TEST_MSAN = "Stateless tests (msan)" - STATELESS_TEST_UBSAN = "Stateless tests (ubsan)" - STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE = ( - "Stateless tests (release, old analyzer, s3, DatabaseReplicated)" - ) - # merged into STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: - # STATELESS_TEST_OLD_ANALYZER_RELEASE = "Stateless tests (release, analyzer)" - # STATELESS_TEST_DB_REPL_RELEASE = "Stateless tests (release, DatabaseReplicated)" - # STATELESS_TEST_S3_RELEASE = "Stateless tests (release, s3 storage)" - STATELESS_TEST_S3_DEBUG = "Stateless tests (debug, s3 storage)" - STATELESS_TEST_S3_TSAN = "Stateless tests (tsan, s3 storage)" - STATELESS_TEST_AZURE_ASAN = "Stateless tests (azure, asan)" - STATELESS_TEST_FLAKY_ASAN = "Stateless tests flaky check (asan)" - - STATEFUL_TEST_DEBUG = "Stateful tests (debug)" - STATEFUL_TEST_RELEASE = "Stateful tests (release)" - STATEFUL_TEST_RELEASE_COVERAGE = "Stateful tests (coverage)" - STATEFUL_TEST_AARCH64 = "Stateful tests (aarch64)" - STATEFUL_TEST_ASAN = "Stateful tests (asan)" - STATEFUL_TEST_TSAN = "Stateful tests (tsan)" - STATEFUL_TEST_MSAN = "Stateful tests (msan)" - STATEFUL_TEST_UBSAN = "Stateful tests (ubsan)" - STATEFUL_TEST_PARALLEL_REPL_RELEASE = "Stateful tests (release, ParallelReplicas)" - STATEFUL_TEST_PARALLEL_REPL_DEBUG = "Stateful tests (debug, ParallelReplicas)" - STATEFUL_TEST_PARALLEL_REPL_ASAN = "Stateful tests (asan, ParallelReplicas)" - STATEFUL_TEST_PARALLEL_REPL_MSAN = "Stateful tests (msan, ParallelReplicas)" - STATEFUL_TEST_PARALLEL_REPL_UBSAN = "Stateful tests (ubsan, ParallelReplicas)" - STATEFUL_TEST_PARALLEL_REPL_TSAN = "Stateful tests (tsan, ParallelReplicas)" - - STRESS_TEST_ASAN = "Stress test (asan)" - STRESS_TEST_TSAN = "Stress test (tsan)" - STRESS_TEST_UBSAN = "Stress test (ubsan)" - STRESS_TEST_MSAN = "Stress test (msan)" - STRESS_TEST_DEBUG = "Stress test (debug)" - STRESS_TEST_AZURE_TSAN = "Stress test (azure, tsan)" - STRESS_TEST_AZURE_MSAN = "Stress test (azure, msan)" - - INTEGRATION_TEST = "Integration tests (release)" - INTEGRATION_TEST_ASAN = "Integration tests (asan)" - INTEGRATION_TEST_ASAN_OLD_ANALYZER = "Integration tests (asan, old analyzer)" - INTEGRATION_TEST_TSAN = "Integration tests (tsan)" - INTEGRATION_TEST_ARM = "Integration tests (aarch64)" - INTEGRATION_TEST_FLAKY = "Integration tests flaky check (asan)" - - UPGRADE_TEST_DEBUG = "Upgrade check (debug)" - UPGRADE_TEST_ASAN = "Upgrade check (asan)" - UPGRADE_TEST_TSAN = "Upgrade check (tsan)" - UPGRADE_TEST_MSAN = "Upgrade check (msan)" - - UNIT_TEST = "Unit tests (release)" - UNIT_TEST_ASAN = "Unit tests (asan)" - UNIT_TEST_MSAN = "Unit tests (msan)" - UNIT_TEST_TSAN = "Unit tests (tsan)" - UNIT_TEST_UBSAN = "Unit tests (ubsan)" - - AST_FUZZER_TEST_DEBUG = "AST fuzzer (debug)" - AST_FUZZER_TEST_ASAN = "AST fuzzer (asan)" - AST_FUZZER_TEST_MSAN = "AST fuzzer (msan)" - AST_FUZZER_TEST_TSAN = "AST fuzzer (tsan)" - AST_FUZZER_TEST_UBSAN = "AST fuzzer (ubsan)" - - JEPSEN_KEEPER = "ClickHouse Keeper Jepsen" - JEPSEN_SERVER = "ClickHouse Server Jepsen" - - PERFORMANCE_TEST_AMD64 = "Performance Comparison" - PERFORMANCE_TEST_ARM64 = "Performance Comparison Aarch64" - - SQL_LOGIC_TEST = "Sqllogic test (release)" - - SQLANCER = "SQLancer (release)" - SQLANCER_DEBUG = "SQLancer (debug)" - SQLTEST = "SQLTest" - - COMPATIBILITY_TEST = "Compatibility check (amd64)" - COMPATIBILITY_TEST_ARM = "Compatibility check (aarch64)" - - CLICKBENCH_TEST = "ClickBench (amd64)" - CLICKBENCH_TEST_ARM = "ClickBench (aarch64)" - - LIBFUZZER_TEST = "libFuzzer tests" - - BUILD_CHECK = "ClickHouse build check" - BUILD_CHECK_SPECIAL = "ClickHouse special build check" - - DOCS_CHECK = "Docs check" - BUGFIX_VALIDATE = "Bugfix validation" - - -class StatusNames(metaclass=WithIter): - "Class with statuses that aren't related to particular jobs" - CI = "CI running" - MERGEABLE = "Mergeable Check" - SYNC = "A Sync" - - -# dynamically update JobName with Build jobs -for attr_name in dir(Build): - if not attr_name.startswith("__") and not callable(getattr(Build, attr_name)): - setattr(JobNames, attr_name, getattr(Build, attr_name)) - - -@dataclass -class DigestConfig: - # all files, dirs to include into digest, glob supported - include_paths: List[Union[str, Path]] = field(default_factory=list) - # file suffixes to exclude from digest - exclude_files: List[str] = field(default_factory=list) - # directories to exclude from digest - exclude_dirs: List[Union[str, Path]] = field(default_factory=list) - # docker names to include into digest - docker: List[str] = field(default_factory=list) - # git submodules digest - git_submodules: bool = False - - -@dataclass -class LabelConfig: - """ - configures different CI scenarios per GH label - """ - - run_jobs: Iterable[str] = frozenset() - - -@dataclass -class JobConfig: - """ - contains config parameters for job execution in CI workflow - """ - - # configures digest calculation for the job - digest: DigestConfig = field(default_factory=DigestConfig) - # will be triggered for the job if omitted in CI workflow yml - run_command: str = "" - # job timeout, seconds - timeout: Optional[int] = None - # sets number of batches for a multi-batch job - num_batches: int = 1 - # label that enables job in CI, if set digest isn't used - run_by_label: str = "" - # to run always regardless of the job digest or/and label - run_always: bool = False - # if the job needs to be run on the release branch, including master (building packages, docker server). - # NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able. - required_on_release_branch: bool = False - # job is for pr workflow only - pr_only: bool = False - # job is for release/master branches only - release_only: bool = False - # to randomly pick and run one job among jobs in the same @random_bucket (PR branches only). - random_bucket: str = "" - # Do not set it. A list of batches to run. It will be set in runtime in accordance with ci cache and ci settings - batches: Optional[List[int]] = None - # Do not set it. A list of batches to await. It will be set in runtime in accordance with ci cache and ci settings - pending_batches: Optional[List[int]] = None - - -builds_job_config = JobConfig( - required_on_release_branch=True, - digest=DigestConfig( - include_paths=[ - "./src", - "./contrib/*-cmake", - "./contrib/consistent-hashing", - "./contrib/murmurhash", - "./contrib/libfarmhash", - "./contrib/pdqsort", - "./contrib/cityhash102", - "./contrib/sparse-checkout", - "./contrib/libmetrohash", - "./contrib/update-submodules.sh", - "./contrib/CMakeLists.txt", - "./CMakeLists.txt", - "./PreLoad.cmake", - "./cmake", - "./base", - "./programs", - "./packages", - "./docker/packager/packager", - "./rust", - # FIXME: This is a WA to rebuild the CH and recreate the Performance.tar.zst artifact - # when there are changes in performance test scripts. - # Due to the current design of the perf test we need to rebuild CH when the performance test changes, - # otherwise the changes will not be visible in the PerformanceTest job in CI - "./tests/performance", - ], - exclude_files=[".md"], - docker=["clickhouse/binary-builder"], - git_submodules=True, - ), - run_command="build_check.py $BUILD_NAME", -) -fuzzer_build_job_config = deepcopy(builds_job_config) -fuzzer_build_job_config.run_by_label = CILabels.libFuzzer - - -@dataclass -class BuildConfig: - name: str - compiler: str - package_type: Literal["deb", "binary", "fuzzers"] - additional_pkgs: bool = False - debug_build: bool = False - coverage: bool = False - sanitizer: str = "" - tidy: bool = False - # sparse_checkout is needed only to test the option itself. - # No particular sense to use it in every build, since it slows down the job. - sparse_checkout: bool = False - comment: str = "" - static_binary_name: str = "" - job_config: JobConfig = field(default_factory=lambda: deepcopy(builds_job_config)) - - def export_env(self, export: bool = False) -> str: - def process(field_name: str, field: Union[bool, str]) -> str: - if isinstance(field, bool): - field = str(field).lower() - elif not isinstance(field, str): - field = "" - if export: - return f"export BUILD_{field_name.upper()}={repr(field)}" - return f"BUILD_{field_name.upper()}={field}" - - return "\n".join(process(k, v) for k, v in self.__dict__.items()) - - -@dataclass -class BuildReportConfig: - builds: List[str] - job_config: JobConfig = field( - default_factory=lambda: JobConfig( - run_command='build_report_check.py "$CHECK_NAME"', - digest=DigestConfig( - include_paths=[ - "./tests/ci/build_report_check.py", - "./tests/ci/upload_result_helper.py", - ], - ), - ) - ) - - -@dataclass -class TestConfig: - required_build: str - job_config: JobConfig = field(default_factory=JobConfig) - - -BuildConfigs = Dict[str, BuildConfig] -BuildsReportConfig = Dict[str, BuildReportConfig] -TestConfigs = Dict[str, TestConfig] -LabelConfigs = Dict[str, LabelConfig] - -# common digests configs -compatibility_check_digest = DigestConfig( - include_paths=["./tests/ci/compatibility_check.py"], - docker=["clickhouse/test-old-ubuntu", "clickhouse/test-old-centos"], -) -install_check_digest = DigestConfig( - include_paths=["./tests/ci/install_check.py"], - docker=["clickhouse/install-deb-test", "clickhouse/install-rpm-test"], -) -stateless_check_digest = DigestConfig( - include_paths=[ - "./tests/ci/functional_test_check.py", - "./tests/queries/0_stateless/", - "./tests/clickhouse-test", - "./tests/config", - "./tests/*.txt", - ], - exclude_files=[".md"], - docker=["clickhouse/stateless-test"], -) -stateful_check_digest = DigestConfig( - include_paths=[ - "./tests/ci/functional_test_check.py", - "./tests/queries/1_stateful/", - "./tests/clickhouse-test", - "./tests/config", - "./tests/*.txt", - ], - exclude_files=[".md"], - docker=["clickhouse/stateful-test"], -) - -stress_check_digest = DigestConfig( - include_paths=[ - "./tests/queries/0_stateless/", - "./tests/queries/1_stateful/", - "./tests/clickhouse-test", - "./tests/config", - "./tests/*.txt", - ], - exclude_files=[".md"], - docker=["clickhouse/stress-test"], -) -# FIXME: which tests are upgrade? just python? -upgrade_check_digest = DigestConfig( - include_paths=["./tests/ci/upgrade_check.py"], - exclude_files=[".md"], - docker=["clickhouse/upgrade-check"], -) -integration_check_digest = DigestConfig( - include_paths=[ - "./tests/ci/integration_test_check.py", - "./tests/ci/integration_tests_runner.py", - "./tests/integration/", - ], - exclude_files=[".md"], - docker=IMAGES.copy(), -) - -ast_fuzzer_check_digest = DigestConfig( - # include_paths=["./tests/ci/ast_fuzzer_check.py"], - # exclude_files=[".md"], - # docker=["clickhouse/fuzzer"], -) -unit_check_digest = DigestConfig( - include_paths=["./tests/ci/unit_tests_check.py"], - exclude_files=[".md"], - docker=["clickhouse/unit-test"], -) -perf_check_digest = DigestConfig( - include_paths=[ - "./tests/ci/performance_comparison_check.py", - "./tests/performance/", - ], - exclude_files=[".md"], - docker=["clickhouse/performance-comparison"], -) -sqllancer_check_digest = DigestConfig( - # include_paths=["./tests/ci/sqlancer_check.py"], - # exclude_files=[".md"], - # docker=["clickhouse/sqlancer-test"], -) -sqllogic_check_digest = DigestConfig( - include_paths=["./tests/ci/sqllogic_test.py"], - exclude_files=[".md"], - docker=["clickhouse/sqllogic-test"], -) -sqltest_check_digest = DigestConfig( - include_paths=["./tests/ci/sqltest.py"], - exclude_files=[".md"], - docker=["clickhouse/sqltest"], -) -bugfix_validate_check = DigestConfig( - include_paths=[ - "./tests/queries/0_stateless/", - "./tests/ci/integration_test_check.py", - "./tests/ci/functional_test_check.py", - "./tests/ci/bugfix_validate_check.py", - ], - exclude_files=[".md"], - docker=IMAGES.copy() - + [ - "clickhouse/stateless-test", - ], -) -# common test params -docker_server_job_config = JobConfig( - required_on_release_branch=True, - run_command='docker_server.py --check-name "$CHECK_NAME" --release-type head --allow-build-reuse', - digest=DigestConfig( - include_paths=[ - "tests/ci/docker_server.py", - "./docker/server", - ] - ), -) -compatibility_test_common_params = { - "digest": compatibility_check_digest, - "run_command": "compatibility_check.py", -} -stateless_test_common_params = { - "digest": stateless_check_digest, - "run_command": 'functional_test_check.py "$CHECK_NAME"', - "timeout": 10800, -} -stateful_test_common_params = { - "digest": stateful_check_digest, - "run_command": 'functional_test_check.py "$CHECK_NAME"', - "timeout": 3600, -} -stress_test_common_params = { - "digest": stress_check_digest, - "run_command": "stress_check.py", - "timeout": 9000, -} -upgrade_test_common_params = { - "digest": upgrade_check_digest, - "run_command": "upgrade_check.py", -} -astfuzzer_test_common_params = { - "digest": ast_fuzzer_check_digest, - "run_command": "ast_fuzzer_check.py", - "run_always": True, -} -integration_test_common_params = { - "digest": integration_check_digest, - "run_command": 'integration_test_check.py "$CHECK_NAME"', -} -unit_test_common_params = { - "digest": unit_check_digest, - "run_command": "unit_tests_check.py", -} -perf_test_common_params = { - "digest": perf_check_digest, - "run_command": "performance_comparison_check.py", -} -sqllancer_test_common_params = JobConfig( - digest=sqllancer_check_digest, - run_command="sqlancer_check.py", - release_only=True, - run_always=True, -) -sqllogic_test_params = JobConfig( - digest=sqllogic_check_digest, - run_command="sqllogic_test.py", - timeout=10800, - release_only=True, -) -sql_test_params = JobConfig( - digest=sqltest_check_digest, - run_command="sqltest.py", - timeout=10800, - release_only=True, -) -clickbench_test_params = { - "digest": DigestConfig( - include_paths=[ - "tests/ci/clickbench.py", - ], - docker=["clickhouse/clickbench"], - ), - "run_command": 'clickbench.py "$CHECK_NAME"', - "timeout": 900, -} -install_test_params = JobConfig( - digest=install_check_digest, - run_command='install_check.py "$CHECK_NAME"', - timeout=900, -) - - -@dataclass -class CIConfig: +class CI: """ Contains configs for all jobs in the CI pipeline each config item in the below dicts should be an instance of JobConfig class or inherited from it """ - build_config: BuildConfigs - builds_report_config: BuildsReportConfig - test_configs: TestConfigs - other_jobs_configs: TestConfigs - label_configs: LabelConfigs + # reimport types to CI class so that they visible as CI.* and mypy is happy + # pylint:disable=useless-import-alias,reimported,import-outside-toplevel + from ci_definitions import BuildConfig as BuildConfig + from ci_definitions import DigestConfig as DigestConfig + from ci_definitions import JobConfig as JobConfig + from ci_definitions import CheckDescription as CheckDescription + from ci_definitions import Tags as Tags + from ci_definitions import JobNames as JobNames + from ci_definitions import BuildNames as BuildNames + from ci_definitions import StatusNames as StatusNames + from ci_definitions import CHECK_DESCRIPTIONS as CHECK_DESCRIPTIONS + from ci_definitions import REQUIRED_CHECKS as REQUIRED_CHECKS + from ci_definitions import SyncState as SyncState + from ci_definitions import MQ_JOBS as MQ_JOBS + from ci_definitions import WorkflowStages as WorkflowStages + from ci_definitions import Runners as Runners # Jobs that run for doc related updates _DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK] - # Jobs that run in Merge Queue if it's enabled - _MQ_JOBS = [ - JobNames.STYLE_CHECK, - JobNames.FAST_TEST, - Build.BINARY_RELEASE, - JobNames.UNIT_TEST, - ] + TAG_CONFIGS = { + Tags.DO_NOT_TEST_LABEL: LabelConfig(run_jobs=[JobNames.STYLE_CHECK]), + Tags.CI_SET_ARM: LabelConfig( + run_jobs=[ + JobNames.STYLE_CHECK, + BuildNames.PACKAGE_AARCH64, + JobNames.INTEGRATION_TEST_ARM, + ] + ), + Tags.CI_SET_REQUIRED: LabelConfig(run_jobs=REQUIRED_CHECKS), + Tags.CI_SET_BUILDS: LabelConfig( + run_jobs=[JobNames.STYLE_CHECK, JobNames.BUILD_CHECK] + + [build for build in BuildNames if build != BuildNames.FUZZERS] + ), + Tags.CI_SET_NON_REQUIRED: LabelConfig( + run_jobs=[job for job in JobNames if job not in REQUIRED_CHECKS] + ), + Tags.CI_SET_OLD_ANALYZER: LabelConfig( + run_jobs=[ + JobNames.STYLE_CHECK, + JobNames.FAST_TEST, + BuildNames.PACKAGE_RELEASE, + BuildNames.PACKAGE_ASAN, + JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, + JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, + ] + ), + Tags.CI_SET_SYNC: LabelConfig( + run_jobs=[ + BuildNames.PACKAGE_ASAN, + JobNames.STYLE_CHECK, + JobNames.BUILD_CHECK, + JobNames.UNIT_TEST_ASAN, + JobNames.STATEFUL_TEST_ASAN, + ] + ), + } - def get_label_config(self, label_name: str) -> Optional[LabelConfig]: - for label, config in self.label_configs.items(): + JOB_CONFIGS: Dict[str, JobConfig] = { + BuildNames.PACKAGE_RELEASE: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_RELEASE, + compiler="clang-18", + package_type="deb", + static_binary_name="amd64", + additional_pkgs=True, + ) + ), + BuildNames.PACKAGE_AARCH64: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_AARCH64, + compiler="clang-18-aarch64", + package_type="deb", + static_binary_name="aarch64", + additional_pkgs=True, + ) + ), + BuildNames.PACKAGE_ASAN: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_ASAN, + compiler="clang-18", + sanitizer="address", + package_type="deb", + ), + ), + BuildNames.PACKAGE_UBSAN: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_UBSAN, + compiler="clang-18", + sanitizer="undefined", + package_type="deb", + ), + ), + BuildNames.PACKAGE_TSAN: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_TSAN, + compiler="clang-18", + sanitizer="thread", + package_type="deb", + ), + ), + BuildNames.PACKAGE_MSAN: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_MSAN, + compiler="clang-18", + sanitizer="memory", + package_type="deb", + ), + ), + BuildNames.PACKAGE_DEBUG: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_DEBUG, + compiler="clang-18", + debug_build=True, + package_type="deb", + sparse_checkout=True, # Check that it works with at least one build, see also update-submodules.sh + ), + ), + BuildNames.PACKAGE_RELEASE_COVERAGE: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.PACKAGE_RELEASE_COVERAGE, + compiler="clang-18", + coverage=True, + package_type="deb", + ), + ), + BuildNames.BINARY_RELEASE: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_RELEASE, + compiler="clang-18", + package_type="binary", + ), + ), + BuildNames.BINARY_TIDY: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_TIDY, + compiler="clang-18", + debug_build=True, + package_type="binary", + static_binary_name="debug-amd64", + tidy=True, + comment="clang-tidy is used for static analysis", + ), + ), + BuildNames.BINARY_DARWIN: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_DARWIN, + compiler="clang-18-darwin", + package_type="binary", + static_binary_name="macos", + ), + ), + BuildNames.BINARY_AARCH64: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_AARCH64, + compiler="clang-18-aarch64", + package_type="binary", + ), + ), + BuildNames.BINARY_AARCH64_V80COMPAT: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_AARCH64_V80COMPAT, + compiler="clang-18-aarch64-v80compat", + package_type="binary", + static_binary_name="aarch64v80compat", + comment="For ARMv8.1 and older", + ), + ), + BuildNames.BINARY_FREEBSD: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_FREEBSD, + compiler="clang-18-freebsd", + package_type="binary", + static_binary_name="freebsd", + ), + ), + BuildNames.BINARY_DARWIN_AARCH64: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_DARWIN_AARCH64, + compiler="clang-18-darwin-aarch64", + package_type="binary", + static_binary_name="macos-aarch64", + ), + ), + BuildNames.BINARY_PPC64LE: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_PPC64LE, + compiler="clang-18-ppc64le", + package_type="binary", + static_binary_name="powerpc64le", + ), + ), + BuildNames.BINARY_AMD64_COMPAT: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_AMD64_COMPAT, + compiler="clang-18-amd64-compat", + package_type="binary", + static_binary_name="amd64compat", + comment="SSE2-only build", + ), + ), + BuildNames.BINARY_AMD64_MUSL: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_AMD64_MUSL, + compiler="clang-18-amd64-musl", + package_type="binary", + static_binary_name="amd64musl", + comment="Build with Musl", + ), + ), + BuildNames.BINARY_RISCV64: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_RISCV64, + compiler="clang-18-riscv64", + package_type="binary", + static_binary_name="riscv64", + ), + ), + BuildNames.BINARY_S390X: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_S390X, + compiler="clang-18-s390x", + package_type="binary", + static_binary_name="s390x", + ), + ), + BuildNames.BINARY_LOONGARCH64: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.BINARY_LOONGARCH64, + compiler="clang-18-loongarch64", + package_type="binary", + static_binary_name="loongarch64", + ), + ), + BuildNames.FUZZERS: CommonJobConfigs.BUILD.with_properties( + build_config=BuildConfig( + name=BuildNames.FUZZERS, + compiler="clang-18", + package_type="fuzzers", + ), + run_by_label=Tags.libFuzzer, + ), + JobNames.BUILD_CHECK: CommonJobConfigs.BUILD_REPORT.with_properties(), + JobNames.INSTALL_TEST_AMD: CommonJobConfigs.INSTALL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE] + ), + JobNames.INSTALL_TEST_ARM: CommonJobConfigs.INSTALL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + runner_type=Runners.STYLE_CHECKER_ARM, + ), + JobNames.STATEFUL_TEST_ASAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN] + ), + JobNames.STATEFUL_TEST_TSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN] + ), + JobNames.STATEFUL_TEST_MSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN] + ), + JobNames.STATEFUL_TEST_UBSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN] + ), + JobNames.STATEFUL_TEST_DEBUG: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG] + ), + JobNames.STATEFUL_TEST_RELEASE: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE] + ), + JobNames.STATEFUL_TEST_RELEASE_COVERAGE: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE] + ), + JobNames.STATEFUL_TEST_AARCH64: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + runner_type=Runners.FUNC_TESTER_ARM, + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE] + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_DEBUG: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG] + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], + random_bucket="parrepl_with_sanitizer", + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], + random_bucket="parrepl_with_sanitizer", + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN], + random_bucket="parrepl_with_sanitizer", + ), + JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + random_bucket="parrepl_with_sanitizer", + ), + JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 + ), + JobNames.STATELESS_TEST_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], num_batches=5 + ), + JobNames.STATELESS_TEST_MSAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], num_batches=6 + ), + JobNames.STATELESS_TEST_UBSAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN], num_batches=2 + ), + JobNames.STATELESS_TEST_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=5 + ), + JobNames.STATELESS_TEST_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + ), + JobNames.STATELESS_TEST_RELEASE_COVERAGE: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE], num_batches=6 + ), + JobNames.STATELESS_TEST_AARCH64: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + runner_type=Runners.FUNC_TESTER_ARM, + ), + JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + ), + JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=6 + ), + JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4, release_only=True + ), + JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + num_batches=5, + ), + JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], + ), + JobNames.STRESS_TEST_TSAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + ), + JobNames.STRESS_TEST_ASAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], + random_bucket="stress_with_sanitizer", + ), + JobNames.STRESS_TEST_UBSAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN], + random_bucket="stress_with_sanitizer", + ), + JobNames.STRESS_TEST_MSAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], + random_bucket="stress_with_sanitizer", + ), + JobNames.STRESS_TEST_AZURE_TSAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], release_only=True + ), + JobNames.STRESS_TEST_AZURE_MSAN: CommonJobConfigs.STRESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], release_only=True + ), + JobNames.UPGRADE_TEST_ASAN: CommonJobConfigs.UPGRADE_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], + random_bucket="upgrade_with_sanitizer", + pr_only=True, + ), + JobNames.UPGRADE_TEST_TSAN: CommonJobConfigs.UPGRADE_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + random_bucket="upgrade_with_sanitizer", + pr_only=True, + ), + JobNames.UPGRADE_TEST_MSAN: CommonJobConfigs.UPGRADE_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], + random_bucket="upgrade_with_sanitizer", + pr_only=True, + ), + JobNames.UPGRADE_TEST_DEBUG: CommonJobConfigs.UPGRADE_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], pr_only=True + ), + JobNames.INTEGRATION_TEST_ASAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4 + ), + JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=6 + ), + JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 + ), + JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + num_batches=6, + runner_type=Runners.FUNC_TESTER_ARM, + ), + JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + num_batches=4, + release_only=True, + ), + JobNames.INTEGRATION_TEST_FLAKY: CommonJobConfigs.INTEGRATION_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True + ), + JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + required_on_release_branch=True, + ), + JobNames.COMPATIBILITY_TEST_ARM: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + required_on_release_branch=True, + runner_type=Runners.STYLE_CHECKER_ARM, + ), + JobNames.UNIT_TEST: CommonJobConfigs.UNIT_TEST.with_properties( + required_builds=[BuildNames.BINARY_RELEASE], + ), + JobNames.UNIT_TEST_ASAN: CommonJobConfigs.UNIT_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], + ), + JobNames.UNIT_TEST_MSAN: CommonJobConfigs.UNIT_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], + ), + JobNames.UNIT_TEST_TSAN: CommonJobConfigs.UNIT_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + ), + JobNames.UNIT_TEST_UBSAN: CommonJobConfigs.UNIT_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN], + ), + JobNames.AST_FUZZER_TEST_DEBUG: CommonJobConfigs.ASTFUZZER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], + ), + JobNames.AST_FUZZER_TEST_ASAN: CommonJobConfigs.ASTFUZZER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], + ), + JobNames.AST_FUZZER_TEST_MSAN: CommonJobConfigs.ASTFUZZER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_MSAN], + ), + JobNames.AST_FUZZER_TEST_TSAN: CommonJobConfigs.ASTFUZZER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_TSAN], + ), + JobNames.AST_FUZZER_TEST_UBSAN: CommonJobConfigs.ASTFUZZER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_UBSAN], + ), + JobNames.STATELESS_TEST_FLAKY_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True, timeout=3600 + ), + JobNames.JEPSEN_KEEPER: JobConfig( + required_builds=[BuildNames.BINARY_RELEASE], + run_by_label="jepsen-test", + run_command="jepsen_check.py keeper", + runner_type=Runners.STYLE_CHECKER_ARM, + ), + JobNames.JEPSEN_SERVER: JobConfig( + required_builds=[BuildNames.BINARY_RELEASE], + run_by_label="jepsen-test", + run_command="jepsen_check.py server", + runner_type=Runners.STYLE_CHECKER_ARM, + ), + JobNames.PERFORMANCE_TEST_AMD64: CommonJobConfigs.PERF_TESTS.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + ), + JobNames.PERFORMANCE_TEST_ARM64: CommonJobConfigs.PERF_TESTS.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + num_batches=4, + run_by_label="pr-performance", + runner_type=Runners.FUNC_TESTER_ARM, + ), + JobNames.SQLANCER: CommonJobConfigs.SQLLANCER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + ), + JobNames.SQLANCER_DEBUG: CommonJobConfigs.SQLLANCER_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_DEBUG], + ), + JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + ), + JobNames.SQLTEST: CommonJobConfigs.SQL_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + ), + JobNames.CLICKBENCH_TEST: CommonJobConfigs.CLICKBENCH_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE], + ), + JobNames.CLICKBENCH_TEST_ARM: CommonJobConfigs.CLICKBENCH_TEST.with_properties( + required_builds=[BuildNames.PACKAGE_AARCH64], + runner_type=Runners.FUNC_TESTER_ARM, + ), + JobNames.LIBFUZZER_TEST: JobConfig( + required_builds=[BuildNames.FUZZERS], + run_by_label=Tags.libFuzzer, + timeout=10800, + run_command='libfuzzer_test_check.py "$CHECK_NAME"', + runner_type=Runners.STYLE_CHECKER, + ), + JobNames.DOCKER_SERVER: CommonJobConfigs.DOCKER_SERVER.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE] + ), + JobNames.DOCKER_KEEPER: CommonJobConfigs.DOCKER_SERVER.with_properties( + required_builds=[BuildNames.PACKAGE_RELEASE] + ), + JobNames.DOCS_CHECK: JobConfig( + digest=DigestConfig( + include_paths=["**/*.md", "./docs", "tests/ci/docs_check.py"], + docker=["clickhouse/docs-builder"], + ), + run_command="docs_check.py", + runner_type=Runners.FUNC_TESTER, + ), + JobNames.FAST_TEST: JobConfig( + pr_only=True, + digest=DigestConfig( + include_paths=["./tests/queries/0_stateless/"], + exclude_files=[".md"], + docker=["clickhouse/fasttest"], + ), + timeout=2400, + runner_type=Runners.BUILDER, + ), + JobNames.STYLE_CHECK: JobConfig( + run_always=True, + runner_type=Runners.STYLE_CHECKER_ARM, + ), + JobNames.BUGFIX_VALIDATE: JobConfig( + run_by_label="pr-bugfix", + run_command="bugfix_validate_check.py", + timeout=900, + runner_type=Runners.STYLE_CHECKER, + ), + } + + @classmethod + def get_tag_config(cls, label_name: str) -> Optional[LabelConfig]: + for label, config in cls.TAG_CONFIGS.items(): if normalize_string(label_name) == normalize_string(label): return config return None - def get_job_ci_stage(self, job_name: str) -> str: + @classmethod + def get_job_ci_stage(cls, job_name: str, non_blocking_ci: bool = False) -> str: if job_name in [ JobNames.STYLE_CHECK, JobNames.FAST_TEST, + JobNames.JEPSEN_SERVER, JobNames.JEPSEN_KEEPER, JobNames.BUILD_CHECK, - JobNames.BUILD_CHECK_SPECIAL, ]: - # FIXME: we can't currently handle Jepsen in the Stage as it's job has concurrency directive - # BUILD_CHECK and BUILD_CHECK_SPECIAL runs not in stage because we need them even if Builds stage failed - return CIStages.NA + return WorkflowStages.NA + stage_type = None - if self.is_build_job(job_name): - stage_type = CIStages.BUILDS_1 - if job_name in CI_CONFIG.get_builds_for_report( - JobNames.BUILD_CHECK_SPECIAL - ): - # special builds go to Build_2 stage to not delay Builds_1/Test_1 - stage_type = CIStages.BUILDS_2 - elif self.is_docs_job(job_name): - stage_type = CIStages.TESTS_1 - elif job_name == JobNames.BUILD_CHECK_SPECIAL: - stage_type = CIStages.TESTS_2 - elif self.is_test_job(job_name): - if job_name in CI_CONFIG.test_configs: - required_build = CI_CONFIG.test_configs[job_name].required_build - assert required_build - if required_build in CI_CONFIG.get_builds_for_report( - JobNames.BUILD_CHECK - ): - stage_type = CIStages.TESTS_1 - else: - stage_type = CIStages.TESTS_2 + if cls.is_build_job(job_name): + for _job, config in cls.JOB_CONFIGS.items(): + if config.required_builds and job_name in config.required_builds: + stage_type = WorkflowStages.BUILDS_1 + break else: - stage_type = CIStages.TESTS_1 - if job_name not in REQUIRED_CHECKS: - stage_type = CIStages.TESTS_3 + stage_type = WorkflowStages.BUILDS_2 + elif cls.is_docs_job(job_name): + stage_type = WorkflowStages.TESTS_1 + elif cls.is_test_job(job_name): + if job_name in CI.JOB_CONFIGS: + if job_name in REQUIRED_CHECKS: + stage_type = WorkflowStages.TESTS_1 + else: + stage_type = WorkflowStages.TESTS_3 assert stage_type, f"BUG [{job_name}]" + if non_blocking_ci and stage_type == WorkflowStages.TESTS_3: + stage_type = WorkflowStages.TESTS_2 return stage_type - def get_job_config(self, check_name: str) -> JobConfig: - res = None - for config in ( - self.build_config, - self.builds_report_config, - self.test_configs, - self.other_jobs_configs, - ): - if check_name in config: # type: ignore - res = config[check_name].job_config # type: ignore - break - return res # type: ignore + @classmethod + def get_job_config(cls, check_name: str) -> JobConfig: + return cls.JOB_CONFIGS[check_name] - def get_runner_type(self, check_name: str) -> str: - result = None - if self.is_build_job(check_name) or check_name == JobNames.FAST_TEST: - result = Runners.BUILDER - elif any( - words in check_name.lower() - for words in [ - "install packages", - "compatibility check", - "docker", - "build check", - "jepsen", - "style check", - ] - ): - result = Runners.STYLE_CHECKER - elif check_name == JobNames.DOCS_CHECK: - # docs job is demanding - result = Runners.FUNC_TESTER_ARM - elif any( - words in check_name.lower() - for words in [ - "stateless", - "stateful", - "clickbench", - "sqllogic test", - "libfuzzer", - "bugfix validation", - ] - ): - result = Runners.FUNC_TESTER - elif any( - words in check_name.lower() - for words in ["stress", "upgrade", "integration", "performance comparison"] - ): - result = Runners.STRESS_TESTER - elif any( - words in check_name.lower() - for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"] - ): - result = Runners.FUZZER_UNIT_TESTER + @classmethod + def get_required_build_name(cls, check_name: str) -> str: + assert check_name in cls.JOB_CONFIGS + required_builds = cls.JOB_CONFIGS[check_name].required_builds + assert required_builds and len(required_builds) == 1 + return required_builds[0] - assert result, f"BUG, no runner for [{check_name}]" - - if ( - "aarch" in check_name.lower() or "arm64" in check_name.lower() - ) and "aarch" not in result: - if result == Runners.STRESS_TESTER: - # FIXME: no arm stress tester group atm - result = Runners.FUNC_TESTER_ARM - elif result == Runners.BUILDER: - # crosscompile - no arm required - pass - else: - # switch to aarch64 runner - result += "-aarch64" - - return result - - def get_job_parents(self, check_name: str) -> List[str]: - res = [] - check_name = normalize_string(check_name) - - for config in ( - self.build_config, - self.builds_report_config, - self.test_configs, - self.other_jobs_configs, - ): - for job_name in config: # type: ignore - if check_name == normalize_string(job_name): - if isinstance(config[job_name], TestConfig): # type: ignore - if config[job_name].required_build: # type: ignore - res.append(config[job_name].required_build) # type: ignore - return res - - def get_digest_config(self, check_name: str) -> DigestConfig: - res = None - for config in ( - self.other_jobs_configs, - self.build_config, - self.builds_report_config, - self.test_configs, - ): - if check_name in config: # type: ignore - res = config[check_name].job_config.digest # type: ignore - assert ( - res - ), f"Invalid check_name or CI_CONFIG outdated, config not found for [{check_name}]" - return res # type: ignore + @classmethod + def get_job_parents(cls, check_name: str) -> List[str]: + return cls.JOB_CONFIGS[check_name].required_builds or [] + @classmethod def get_workflow_jobs_with_configs( - self, is_mq: bool, is_docs_only: bool, is_master: bool + cls, is_mq: bool, is_docs_only: bool, is_master: bool, is_pr: bool ) -> Dict[str, JobConfig]: """ get a list of all jobs for a workflow with configs """ jobs = [] if is_mq: - jobs = self._MQ_JOBS + jobs = MQ_JOBS elif is_docs_only: - jobs = self._DOCS_CHECK_JOBS + jobs = cls._DOCS_CHECK_JOBS else: - for config in ( - self.other_jobs_configs, - self.build_config, - self.builds_report_config, - self.test_configs, - ): - jobs += list(config) # type:ignore + # add all jobs + jobs = list(cls.JOB_CONFIGS) if is_master: - for job in self._MQ_JOBS: + for job in MQ_JOBS: jobs.remove(job) randomization_bucket_jobs = {} # type: Dict[str, Dict[str, JobConfig]] res = {} # type: Dict[str, JobConfig] for job in jobs: - job_config = self.get_job_config(job) + job_config = cls.JOB_CONFIGS[job] - if job_config.random_bucket: + if job_config.random_bucket and is_pr: if job_config.random_bucket not in randomization_bucket_jobs: randomization_bucket_jobs[job_config.random_bucket] = {} randomization_bucket_jobs[job_config.random_bucket][job] = job_config @@ -760,41 +631,20 @@ class CIConfig: return res - def get_builds_for_report( - self, report_name: str, release: bool = False, backport: bool = False - ) -> List[str]: - # hack to modify build list for release and bp wf - assert not (release and backport), "Invalid input" - if backport and report_name == JobNames.BUILD_CHECK: - return [ - Build.PACKAGE_RELEASE, - Build.PACKAGE_AARCH64, - Build.PACKAGE_ASAN, - Build.PACKAGE_TSAN, - Build.PACKAGE_DEBUG, - ] - if (release or backport) and report_name == JobNames.BUILD_CHECK_SPECIAL: - return [ - Build.BINARY_DARWIN, - Build.BINARY_DARWIN_AARCH64, - ] - - return self.builds_report_config[report_name].builds - @classmethod def is_build_job(cls, job: str) -> bool: - return job in Build + return job in cls.BuildNames @classmethod def is_test_job(cls, job: str) -> bool: - return not cls.is_build_job(job) and job != JobNames.STYLE_CHECK + return not cls.is_build_job(job) and job != cls.JobNames.STYLE_CHECK @classmethod def is_docs_job(cls, job: str) -> bool: return job == JobNames.DOCS_CHECK - @staticmethod - def is_required(check_name: str) -> bool: + @classmethod + def is_required(cls, check_name: str) -> bool: """Checks if a check_name is in REQUIRED_CHECKS, including batched jobs""" _BATCH_REGEXP = re.compile(r"\s+\[[0-9/]+\]$") if check_name in REQUIRED_CHECKS: @@ -803,768 +653,15 @@ class CIConfig: return check_name[: batch.start()] in REQUIRED_CHECKS return False - def validate(self) -> None: - errors = [] - for name, build_config in self.build_config.items(): - build_in_reports = False - for _, report_config in self.builds_report_config.items(): - if name in report_config.builds: - build_in_reports = True - break - # All build configs must belong to build_report_config - if not build_in_reports: - logging.error("Build name %s does not belong to build reports", name) - errors.append(f"Build name {name} does not belong to build reports") - # The name should be the same as build_config.name - if not build_config.name == name: - logging.error( - "Build name '%s' does not match the config 'name' value '%s'", - name, - build_config.name, - ) - errors.append( - f"Build name {name} does not match 'name' value '{build_config.name}'" - ) - # All build_report_config values should be in build_config.keys() - for build_report_name, build_report_config in self.builds_report_config.items(): - build_names = build_report_config.builds - missed_names = [ - name for name in build_names if name not in self.build_config.keys() - ] - if missed_names: - logging.error( - "The following names of the build report '%s' " - "are missed in build_config: %s", - build_report_name, - missed_names, - ) - errors.append( - f"The following names of the build report '{build_report_name}' " - f"are missed in build_config: {missed_names}", - ) - # And finally, all tests' requirements must be in the builds - for test_name, test_config in self.test_configs.items(): - if test_config.required_build not in self.build_config.keys(): - logging.error( - "The requirement '%s' for '%s' is not found in builds", - test_config, - test_name, - ) - errors.append( - f"The requirement '{test_config}' for " - f"'{test_name}' is not found in builds" - ) - - if errors: - raise KeyError("config contains errors", errors) + @classmethod + def get_build_config(cls, build_name: str) -> BuildConfig: + assert build_name in cls.JOB_CONFIGS, f"Invalid build name [{build_name}]" + res = cls.JOB_CONFIGS[build_name].build_config + assert res, f"not a build [{build_name}] or invalid JobConfig" + return res -# checks required by Mergeable Check -REQUIRED_CHECKS = [ - "PR Check", - StatusNames.SYNC, - JobNames.BUILD_CHECK, - JobNames.BUILD_CHECK_SPECIAL, - JobNames.DOCS_CHECK, - JobNames.FAST_TEST, - JobNames.STATEFUL_TEST_RELEASE, - JobNames.STATELESS_TEST_RELEASE, - JobNames.STATELESS_TEST_ASAN, - JobNames.STATELESS_TEST_FLAKY_ASAN, - JobNames.STATEFUL_TEST_ASAN, - JobNames.STYLE_CHECK, - JobNames.UNIT_TEST_ASAN, - JobNames.UNIT_TEST_MSAN, - JobNames.UNIT_TEST, - JobNames.UNIT_TEST_TSAN, - JobNames.UNIT_TEST_UBSAN, - JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, - JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, -] - -CI_CONFIG = CIConfig( - label_configs={ - CILabels.DO_NOT_TEST_LABEL: LabelConfig(run_jobs=[JobNames.STYLE_CHECK]), - CILabels.CI_SET_ARM: LabelConfig( - run_jobs=[ - JobNames.STYLE_CHECK, - Build.PACKAGE_AARCH64, - JobNames.INTEGRATION_TEST_ARM, - ] - ), - CILabels.CI_SET_REQUIRED: LabelConfig(run_jobs=REQUIRED_CHECKS), - CILabels.CI_SET_NON_REQUIRED: LabelConfig( - run_jobs=[job for job in JobNames if job not in REQUIRED_CHECKS] - ), - CILabels.CI_SET_OLD_ANALYZER: LabelConfig( - run_jobs=[ - JobNames.STYLE_CHECK, - JobNames.FAST_TEST, - Build.PACKAGE_RELEASE, - Build.PACKAGE_ASAN, - JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, - JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, - ] - ), - CILabels.CI_SET_SYNC: LabelConfig( - run_jobs=[ - Build.PACKAGE_ASAN, - JobNames.STYLE_CHECK, - JobNames.BUILD_CHECK, - JobNames.UNIT_TEST_ASAN, - JobNames.STATEFUL_TEST_ASAN, - ] - ), - }, - build_config={ - Build.PACKAGE_RELEASE: BuildConfig( - name=Build.PACKAGE_RELEASE, - compiler="clang-18", - package_type="deb", - static_binary_name="amd64", - additional_pkgs=True, - ), - Build.PACKAGE_AARCH64: BuildConfig( - name=Build.PACKAGE_AARCH64, - compiler="clang-18-aarch64", - package_type="deb", - static_binary_name="aarch64", - additional_pkgs=True, - ), - Build.PACKAGE_ASAN: BuildConfig( - name=Build.PACKAGE_ASAN, - compiler="clang-18", - sanitizer="address", - package_type="deb", - ), - Build.PACKAGE_UBSAN: BuildConfig( - name=Build.PACKAGE_UBSAN, - compiler="clang-18", - sanitizer="undefined", - package_type="deb", - ), - Build.PACKAGE_TSAN: BuildConfig( - name=Build.PACKAGE_TSAN, - compiler="clang-18", - sanitizer="thread", - package_type="deb", - ), - Build.PACKAGE_MSAN: BuildConfig( - name=Build.PACKAGE_MSAN, - compiler="clang-18", - sanitizer="memory", - package_type="deb", - ), - Build.PACKAGE_DEBUG: BuildConfig( - name=Build.PACKAGE_DEBUG, - compiler="clang-18", - debug_build=True, - package_type="deb", - sparse_checkout=True, # Check that it works with at least one build, see also update-submodules.sh - ), - Build.PACKAGE_RELEASE_COVERAGE: BuildConfig( - name=Build.PACKAGE_RELEASE_COVERAGE, - compiler="clang-18", - coverage=True, - package_type="deb", - ), - Build.BINARY_RELEASE: BuildConfig( - name=Build.BINARY_RELEASE, - compiler="clang-18", - package_type="binary", - ), - Build.BINARY_TIDY: BuildConfig( - name=Build.BINARY_TIDY, - compiler="clang-18", - debug_build=True, - package_type="binary", - static_binary_name="debug-amd64", - tidy=True, - comment="clang-tidy is used for static analysis", - ), - Build.BINARY_DARWIN: BuildConfig( - name=Build.BINARY_DARWIN, - compiler="clang-18-darwin", - package_type="binary", - static_binary_name="macos", - ), - Build.BINARY_AARCH64: BuildConfig( - name=Build.BINARY_AARCH64, - compiler="clang-18-aarch64", - package_type="binary", - ), - Build.BINARY_AARCH64_V80COMPAT: BuildConfig( - name=Build.BINARY_AARCH64_V80COMPAT, - compiler="clang-18-aarch64-v80compat", - package_type="binary", - static_binary_name="aarch64v80compat", - comment="For ARMv8.1 and older", - ), - Build.BINARY_FREEBSD: BuildConfig( - name=Build.BINARY_FREEBSD, - compiler="clang-18-freebsd", - package_type="binary", - static_binary_name="freebsd", - ), - Build.BINARY_DARWIN_AARCH64: BuildConfig( - name=Build.BINARY_DARWIN_AARCH64, - compiler="clang-18-darwin-aarch64", - package_type="binary", - static_binary_name="macos-aarch64", - ), - Build.BINARY_PPC64LE: BuildConfig( - name=Build.BINARY_PPC64LE, - compiler="clang-18-ppc64le", - package_type="binary", - static_binary_name="powerpc64le", - ), - Build.BINARY_AMD64_COMPAT: BuildConfig( - name=Build.BINARY_AMD64_COMPAT, - compiler="clang-18-amd64-compat", - package_type="binary", - static_binary_name="amd64compat", - comment="SSE2-only build", - ), - Build.BINARY_AMD64_MUSL: BuildConfig( - name=Build.BINARY_AMD64_MUSL, - compiler="clang-18-amd64-musl", - package_type="binary", - static_binary_name="amd64musl", - comment="Build with Musl", - ), - Build.BINARY_RISCV64: BuildConfig( - name=Build.BINARY_RISCV64, - compiler="clang-18-riscv64", - package_type="binary", - static_binary_name="riscv64", - ), - Build.BINARY_S390X: BuildConfig( - name=Build.BINARY_S390X, - compiler="clang-18-s390x", - package_type="binary", - static_binary_name="s390x", - ), - Build.BINARY_LOONGARCH64: BuildConfig( - name=Build.BINARY_LOONGARCH64, - compiler="clang-18-loongarch64", - package_type="binary", - static_binary_name="loongarch64", - ), - Build.FUZZERS: BuildConfig( - name=Build.FUZZERS, - compiler="clang-18", - package_type="fuzzers", - job_config=fuzzer_build_job_config, - ), - }, - builds_report_config={ - JobNames.BUILD_CHECK: BuildReportConfig( - builds=[ - Build.PACKAGE_RELEASE, - Build.PACKAGE_AARCH64, - Build.PACKAGE_ASAN, - Build.PACKAGE_UBSAN, - Build.PACKAGE_TSAN, - Build.PACKAGE_MSAN, - Build.PACKAGE_DEBUG, - Build.BINARY_RELEASE, - ] - ), - JobNames.BUILD_CHECK_SPECIAL: BuildReportConfig( - builds=[ - Build.BINARY_TIDY, - Build.BINARY_DARWIN, - Build.BINARY_AARCH64, - Build.BINARY_AARCH64_V80COMPAT, - Build.BINARY_FREEBSD, - Build.BINARY_DARWIN_AARCH64, - Build.BINARY_PPC64LE, - Build.BINARY_RISCV64, - Build.BINARY_S390X, - Build.BINARY_LOONGARCH64, - Build.BINARY_AMD64_COMPAT, - Build.BINARY_AMD64_MUSL, - Build.PACKAGE_RELEASE_COVERAGE, - Build.FUZZERS, - ] - ), - }, - other_jobs_configs={ - JobNames.DOCKER_SERVER: TestConfig("", job_config=docker_server_job_config), - JobNames.DOCKER_KEEPER: TestConfig("", job_config=docker_server_job_config), - JobNames.DOCS_CHECK: TestConfig( - "", - job_config=JobConfig( - digest=DigestConfig( - include_paths=["**/*.md", "./docs", "tests/ci/docs_check.py"], - docker=["clickhouse/docs-builder"], - ), - run_command="docs_check.py", - ), - ), - JobNames.FAST_TEST: TestConfig( - "", - job_config=JobConfig( - pr_only=True, - digest=DigestConfig( - include_paths=["./tests/queries/0_stateless/"], - exclude_files=[".md"], - docker=["clickhouse/fasttest"], - ), - timeout=2400, - ), - ), - JobNames.STYLE_CHECK: TestConfig( - "", - job_config=JobConfig( - run_always=True, - ), - ), - JobNames.BUGFIX_VALIDATE: TestConfig( - "", - # we run this check by label - no digest required - job_config=JobConfig( - run_by_label="pr-bugfix", - run_command="bugfix_validate_check.py", - timeout=900, - ), - ), - }, - test_configs={ - JobNames.INSTALL_TEST_AMD: TestConfig( - Build.PACKAGE_RELEASE, job_config=install_test_params - ), - JobNames.INSTALL_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, job_config=install_test_params - ), - JobNames.STATEFUL_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_RELEASE_COVERAGE: TestConfig( - Build.PACKAGE_RELEASE_COVERAGE, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_AARCH64: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - # Stateful tests for parallel replicas - JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_PARALLEL_REPL_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore - ), - JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore - ), - # End stateful tests for parallel replicas - JobNames.STATELESS_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, - job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, - job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, - job_config=JobConfig(num_batches=2, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, - job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**stateless_test_common_params) # type: ignore - ), - JobNames.STATELESS_TEST_RELEASE_COVERAGE: TestConfig( - Build.PACKAGE_RELEASE_COVERAGE, - job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_AARCH64: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(**stateless_test_common_params) # type: ignore - ), - JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: TestConfig( - Build.PACKAGE_RELEASE, - job_config=JobConfig(num_batches=4, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_S3_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, - job_config=JobConfig(num_batches=6, **stateless_test_common_params), # type: ignore - ), - JobNames.STATELESS_TEST_AZURE_ASAN: TestConfig( - Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **stateless_test_common_params, release_only=True), # type: ignore - ), - JobNames.STATELESS_TEST_S3_TSAN: TestConfig( - Build.PACKAGE_TSAN, - job_config=JobConfig(num_batches=5, **stateless_test_common_params), # type: ignore - ), - JobNames.STRESS_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore - ), - JobNames.UPGRADE_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_AZURE_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params, release_only=True) # type: ignore - ), - JobNames.STRESS_TEST_AZURE_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params, release_only=True) # type: ignore - ), - JobNames.UPGRADE_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore - ), - JobNames.UPGRADE_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore - ), - JobNames.UPGRADE_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore - ), - JobNames.INTEGRATION_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=4, **integration_test_common_params, release_only=True), # type: ignore - ), - JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: TestConfig( - Build.PACKAGE_ASAN, - job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore - ), - JobNames.INTEGRATION_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, - job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore - ), - JobNames.INTEGRATION_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, - job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore - ), - JobNames.INTEGRATION_TEST: TestConfig( - Build.PACKAGE_RELEASE, - job_config=JobConfig(num_batches=4, **integration_test_common_params, release_only=True), # type: ignore - ), - JobNames.INTEGRATION_TEST_FLAKY: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **integration_test_common_params) # type: ignore - ), - JobNames.COMPATIBILITY_TEST: TestConfig( - Build.PACKAGE_RELEASE, - job_config=JobConfig( - required_on_release_branch=True, **compatibility_test_common_params # type: ignore - ), - ), - JobNames.COMPATIBILITY_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, - job_config=JobConfig( - required_on_release_branch=True, **compatibility_test_common_params # type: ignore - ), - ), - JobNames.UNIT_TEST: TestConfig( - Build.BINARY_RELEASE, job_config=JobConfig(**unit_test_common_params) # type: ignore - ), - JobNames.UNIT_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(**unit_test_common_params) # type: ignore - ), - JobNames.UNIT_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore - ), - JobNames.UNIT_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore - ), - JobNames.UNIT_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore - ), - JobNames.AST_FUZZER_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore - ), - JobNames.AST_FUZZER_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore - ), - JobNames.AST_FUZZER_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore - ), - JobNames.AST_FUZZER_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore - ), - JobNames.AST_FUZZER_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore - ), - JobNames.STATELESS_TEST_FLAKY_ASAN: TestConfig( - # replace to non-default - Build.PACKAGE_ASAN, - job_config=JobConfig(pr_only=True, **{**stateless_test_common_params, "timeout": 3600}), # type: ignore - ), - JobNames.JEPSEN_KEEPER: TestConfig( - Build.BINARY_RELEASE, - job_config=JobConfig( - run_by_label="jepsen-test", run_command="jepsen_check.py keeper" - ), - ), - JobNames.JEPSEN_SERVER: TestConfig( - Build.BINARY_RELEASE, - job_config=JobConfig( - run_by_label="jepsen-test", run_command="jepsen_check.py server" - ), - ), - JobNames.PERFORMANCE_TEST_AMD64: TestConfig( - Build.PACKAGE_RELEASE, - job_config=JobConfig(num_batches=4, **perf_test_common_params), # type: ignore - ), - JobNames.PERFORMANCE_TEST_ARM64: TestConfig( - Build.PACKAGE_AARCH64, - job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params), # type: ignore - ), - JobNames.SQLANCER: TestConfig( - Build.PACKAGE_RELEASE, job_config=sqllancer_test_common_params - ), - JobNames.SQLANCER_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=sqllancer_test_common_params - ), - JobNames.SQL_LOGIC_TEST: TestConfig( - Build.PACKAGE_RELEASE, job_config=sqllogic_test_params - ), - JobNames.SQLTEST: TestConfig(Build.PACKAGE_RELEASE, job_config=sql_test_params), - JobNames.CLICKBENCH_TEST: TestConfig( - Build.PACKAGE_RELEASE, job_config=JobConfig(**clickbench_test_params) # type: ignore - ), - JobNames.CLICKBENCH_TEST_ARM: TestConfig( - Build.PACKAGE_AARCH64, job_config=JobConfig(**clickbench_test_params) # type: ignore - ), - JobNames.LIBFUZZER_TEST: TestConfig( - Build.FUZZERS, - job_config=JobConfig( - run_by_label=CILabels.libFuzzer, - timeout=10800, - run_command='libfuzzer_test_check.py "$CHECK_NAME"', - ), - ), # type: ignore - }, -) -CI_CONFIG.validate() - - -@dataclass -class CheckDescription: - name: str - description: str # the check descriptions, will be put into the status table - match_func: Callable[[str], bool] # the function to check vs the commit status - - def __hash__(self) -> int: - return hash(self.name + self.description) - - -CHECK_DESCRIPTIONS = [ - CheckDescription( - "PR Check", - "Checks correctness of the PR's body", - lambda x: x == "PR Check", - ), - CheckDescription( - StatusNames.SYNC, - "If it fails, ask a maintainer for help", - lambda x: x == StatusNames.SYNC, - ), - CheckDescription( - "AST fuzzer", - "Runs randomly generated queries to catch program errors. " - "The build type is optionally given in parenthesis. " - "If it fails, ask a maintainer for help", - lambda x: x.startswith("AST fuzzer"), - ), - CheckDescription( - JobNames.BUGFIX_VALIDATE, - "Checks that either a new test (functional or integration) or there " - "some changed tests that fail with the binary built on master branch", - lambda x: x == JobNames.BUGFIX_VALIDATE, - ), - CheckDescription( - "CI running", - "A meta-check that indicates the running CI. Normally, it's in success or " - "pending state. The failed status indicates some problems with the PR", - lambda x: x == "CI running", - ), - CheckDescription( - "ClickHouse build check", - "Builds ClickHouse in various configurations for use in further steps. " - "You have to fix the builds that fail. Build logs often has enough " - "information to fix the error, but you might have to reproduce the failure " - "locally. The cmake options can be found in the build log, grepping for " - 'cmake. Use these options and follow the general build process', - lambda x: x.startswith("ClickHouse") and x.endswith("build check"), - ), - CheckDescription( - "Compatibility check", - "Checks that clickhouse binary runs on distributions with old libc " - "versions. If it fails, ask a maintainer for help", - lambda x: x.startswith("Compatibility check"), - ), - CheckDescription( - JobNames.DOCKER_SERVER, - "The check to build and optionally push the mentioned image to docker hub", - lambda x: x.startswith("Docker server"), - ), - CheckDescription( - JobNames.DOCKER_KEEPER, - "The check to build and optionally push the mentioned image to docker hub", - lambda x: x.startswith("Docker keeper"), - ), - CheckDescription( - JobNames.DOCS_CHECK, - "Builds and tests the documentation", - lambda x: x == JobNames.DOCS_CHECK, - ), - CheckDescription( - JobNames.FAST_TEST, - "Normally this is the first check that is ran for a PR. It builds ClickHouse " - 'and runs most of stateless functional tests, ' - "omitting some. If it fails, further checks are not started until it is fixed. " - "Look at the report to see which tests fail, then reproduce the failure " - 'locally as described here', - lambda x: x == JobNames.FAST_TEST, - ), - CheckDescription( - "Flaky tests", - "Checks if new added or modified tests are flaky by running them repeatedly, " - "in parallel, with more randomization. Functional tests are run 100 times " - "with address sanitizer, and additional randomization of thread scheduling. " - "Integration tests are run up to 10 times. If at least once a new test has " - "failed, or was too long, this check will be red. We don't allow flaky tests, " - 'read the doc', - lambda x: "tests flaky check" in x, - ), - CheckDescription( - "Install packages", - "Checks that the built packages are installable in a clear environment", - lambda x: x.startswith("Install packages ("), - ), - CheckDescription( - "Integration tests", - "The integration tests report. In parenthesis the package type is given, " - "and in square brackets are the optional part/total tests", - lambda x: x.startswith("Integration tests ("), - ), - CheckDescription( - StatusNames.MERGEABLE, - "Checks if all other necessary checks are successful", - lambda x: x == StatusNames.MERGEABLE, - ), - CheckDescription( - "Performance Comparison", - "Measure changes in query performance. The performance test report is " - 'described in detail here. ' - "In square brackets are the optional part/total tests", - lambda x: x.startswith("Performance Comparison"), - ), - CheckDescription( - "Push to Dockerhub", - "The check for building and pushing the CI related docker images to docker hub", - lambda x: x.startswith("Push") and "to Dockerhub" in x, - ), - CheckDescription( - "Sqllogic", - "Run clickhouse on the " - 'sqllogic ' - "test set against sqlite and checks that all statements are passed", - lambda x: x.startswith("Sqllogic test"), - ), - CheckDescription( - "SQLancer", - "Fuzzing tests that detect logical bugs with " - 'SQLancer tool', - lambda x: x.startswith("SQLancer"), - ), - CheckDescription( - "Stateful tests", - "Runs stateful functional tests for ClickHouse binaries built in various " - "configurations -- release, debug, with sanitizers, etc", - lambda x: x.startswith("Stateful tests ("), - ), - CheckDescription( - "Stateless tests", - "Runs stateless functional tests for ClickHouse binaries built in various " - "configurations -- release, debug, with sanitizers, etc", - lambda x: x.startswith("Stateless tests ("), - ), - CheckDescription( - "Stress test", - "Runs stateless functional tests concurrently from several clients to detect " - "concurrency-related errors", - lambda x: x.startswith("Stress test ("), - ), - CheckDescription( - JobNames.STYLE_CHECK, - "Runs a set of checks to keep the code style clean. If some of tests failed, " - "see the related log from the report", - lambda x: x == JobNames.STYLE_CHECK, - ), - CheckDescription( - "Unit tests", - "Runs the unit tests for different release types", - lambda x: x.startswith("Unit tests ("), - ), - CheckDescription( - "Upgrade check", - "Runs stress tests on server version from last release and then tries to " - "upgrade it to the version from the PR. It checks if the new server can " - "successfully startup without any errors, crashes or sanitizer asserts", - lambda x: x.startswith("Upgrade check ("), - ), - CheckDescription( - "ClickBench", - "Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table", - lambda x: x.startswith("ClickBench"), - ), - CheckDescription( - "Fallback for unknown", - "There's no description for the check yet, please add it to " - "tests/ci/ci_config.py:CHECK_DESCRIPTIONS", - lambda x: True, - ), -] - - -def main() -> None: +if __name__ == "__main__": parser = ArgumentParser( formatter_class=ArgumentDefaultsHelpFormatter, description="The script provides build config for GITHUB_ENV or shell export", @@ -1576,10 +673,9 @@ def main() -> None: help="if set, the ENV parameters are provided for shell export", ) args = parser.parse_args() - build_config = CI_CONFIG.build_config.get(args.build_name) - if build_config: - print(build_config.export_env(args.export)) - - -if __name__ == "__main__": - main() + assert ( + args.build_name in CI.JOB_CONFIGS + ), f"Build name [{args.build_name}] is not valid" + build_config = CI.JOB_CONFIGS[args.build_name].build_config + assert build_config, "--export must not be used for non-build jobs" + print(build_config.export_env(args.export)) diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py new file mode 100644 index 00000000000..48e1280d939 --- /dev/null +++ b/tests/ci/ci_definitions.py @@ -0,0 +1,785 @@ +import copy +from dataclasses import dataclass, field +from pathlib import Path +from typing import Callable, List, Union, Iterable, Optional, Literal, Any + +from ci_utils import WithIter +from integration_test_images import IMAGES + + +class WorkflowStages(metaclass=WithIter): + """ + Stages of GitHUb actions workflow + """ + + # for jobs that do not belong to any stage, e.g. Build Report Check + NA = "UNKNOWN" + # normal builds (builds that required for further testing) + BUILDS_1 = "Builds_1" + # special builds + BUILDS_2 = "Builds_2" + # all tests required for merge + TESTS_1 = "Tests_1" + # not used atm + TESTS_2 = "Tests_2" + # all tests not required for merge + TESTS_3 = "Tests_3" + + +class Runners(metaclass=WithIter): + """ + GitHub runner's labels + """ + + BUILDER = "builder" + STYLE_CHECKER = "style-checker" + STYLE_CHECKER_ARM = "style-checker-aarch64" + FUNC_TESTER = "func-tester" + FUNC_TESTER_ARM = "func-tester-aarch64" + STRESS_TESTER = "stress-tester" + FUZZER_UNIT_TESTER = "fuzzer-unit-tester" + + +class Tags(metaclass=WithIter): + """ + CI Customization tags (set via PR body or some of them in GH labels, e.g. libFuzzer) + """ + + DO_NOT_TEST_LABEL = "do_not_test" + WOOLEN_WOLFDOG_LABEL = "woolen_wolfdog" + NO_MERGE_COMMIT = "no_merge_commit" + NO_CI_CACHE = "no_ci_cache" + # to upload all binaries from build jobs + UPLOAD_ALL_ARTIFACTS = "upload_all" + CI_SET_SYNC = "ci_set_sync" + CI_SET_ARM = "ci_set_arm" + CI_SET_REQUIRED = "ci_set_required" + CI_SET_BUILDS = "ci_set_builds" + CI_SET_NON_REQUIRED = "ci_set_non_required" + CI_SET_OLD_ANALYZER = "ci_set_old_analyzer" + + libFuzzer = "libFuzzer" + + +class BuildNames(metaclass=WithIter): + """ + Build' job names + """ + + PACKAGE_RELEASE = "package_release" + PACKAGE_AARCH64 = "package_aarch64" + PACKAGE_ASAN = "package_asan" + PACKAGE_UBSAN = "package_ubsan" + PACKAGE_TSAN = "package_tsan" + PACKAGE_MSAN = "package_msan" + PACKAGE_DEBUG = "package_debug" + PACKAGE_RELEASE_COVERAGE = "package_release_coverage" + BINARY_RELEASE = "binary_release" + BINARY_TIDY = "binary_tidy" + BINARY_DARWIN = "binary_darwin" + BINARY_AARCH64 = "binary_aarch64" + BINARY_AARCH64_V80COMPAT = "binary_aarch64_v80compat" + BINARY_FREEBSD = "binary_freebsd" + BINARY_DARWIN_AARCH64 = "binary_darwin_aarch64" + BINARY_PPC64LE = "binary_ppc64le" + BINARY_AMD64_COMPAT = "binary_amd64_compat" + BINARY_AMD64_MUSL = "binary_amd64_musl" + BINARY_RISCV64 = "binary_riscv64" + BINARY_S390X = "binary_s390x" + BINARY_LOONGARCH64 = "binary_loongarch64" + FUZZERS = "fuzzers" + + +class JobNames(metaclass=WithIter): + """ + All CI non-build jobs (Build jobs are concatenated to this list via python hack) + """ + + STYLE_CHECK = "Style check" + FAST_TEST = "Fast test" + DOCKER_SERVER = "Docker server image" + DOCKER_KEEPER = "Docker keeper image" + INSTALL_TEST_AMD = "Install packages (release)" + INSTALL_TEST_ARM = "Install packages (aarch64)" + + STATELESS_TEST_DEBUG = "Stateless tests (debug)" + STATELESS_TEST_RELEASE = "Stateless tests (release)" + STATELESS_TEST_RELEASE_COVERAGE = "Stateless tests (coverage)" + STATELESS_TEST_AARCH64 = "Stateless tests (aarch64)" + STATELESS_TEST_ASAN = "Stateless tests (asan)" + STATELESS_TEST_TSAN = "Stateless tests (tsan)" + STATELESS_TEST_MSAN = "Stateless tests (msan)" + STATELESS_TEST_UBSAN = "Stateless tests (ubsan)" + STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE = ( + "Stateless tests (release, old analyzer, s3, DatabaseReplicated)" + ) + STATELESS_TEST_S3_DEBUG = "Stateless tests (debug, s3 storage)" + STATELESS_TEST_S3_TSAN = "Stateless tests (tsan, s3 storage)" + STATELESS_TEST_AZURE_ASAN = "Stateless tests (azure, asan)" + STATELESS_TEST_FLAKY_ASAN = "Stateless tests flaky check (asan)" + + STATEFUL_TEST_DEBUG = "Stateful tests (debug)" + STATEFUL_TEST_RELEASE = "Stateful tests (release)" + STATEFUL_TEST_RELEASE_COVERAGE = "Stateful tests (coverage)" + STATEFUL_TEST_AARCH64 = "Stateful tests (aarch64)" + STATEFUL_TEST_ASAN = "Stateful tests (asan)" + STATEFUL_TEST_TSAN = "Stateful tests (tsan)" + STATEFUL_TEST_MSAN = "Stateful tests (msan)" + STATEFUL_TEST_UBSAN = "Stateful tests (ubsan)" + STATEFUL_TEST_PARALLEL_REPL_RELEASE = "Stateful tests (release, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_DEBUG = "Stateful tests (debug, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_ASAN = "Stateful tests (asan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_MSAN = "Stateful tests (msan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_UBSAN = "Stateful tests (ubsan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_TSAN = "Stateful tests (tsan, ParallelReplicas)" + + STRESS_TEST_ASAN = "Stress test (asan)" + STRESS_TEST_TSAN = "Stress test (tsan)" + STRESS_TEST_UBSAN = "Stress test (ubsan)" + STRESS_TEST_MSAN = "Stress test (msan)" + STRESS_TEST_DEBUG = "Stress test (debug)" + STRESS_TEST_AZURE_TSAN = "Stress test (azure, tsan)" + STRESS_TEST_AZURE_MSAN = "Stress test (azure, msan)" + + INTEGRATION_TEST = "Integration tests (release)" + INTEGRATION_TEST_ASAN = "Integration tests (asan)" + INTEGRATION_TEST_ASAN_OLD_ANALYZER = "Integration tests (asan, old analyzer)" + INTEGRATION_TEST_TSAN = "Integration tests (tsan)" + INTEGRATION_TEST_ARM = "Integration tests (aarch64)" + INTEGRATION_TEST_FLAKY = "Integration tests flaky check (asan)" + + UPGRADE_TEST_DEBUG = "Upgrade check (debug)" + UPGRADE_TEST_ASAN = "Upgrade check (asan)" + UPGRADE_TEST_TSAN = "Upgrade check (tsan)" + UPGRADE_TEST_MSAN = "Upgrade check (msan)" + + UNIT_TEST = "Unit tests (release)" + UNIT_TEST_ASAN = "Unit tests (asan)" + UNIT_TEST_MSAN = "Unit tests (msan)" + UNIT_TEST_TSAN = "Unit tests (tsan)" + UNIT_TEST_UBSAN = "Unit tests (ubsan)" + + AST_FUZZER_TEST_DEBUG = "AST fuzzer (debug)" + AST_FUZZER_TEST_ASAN = "AST fuzzer (asan)" + AST_FUZZER_TEST_MSAN = "AST fuzzer (msan)" + AST_FUZZER_TEST_TSAN = "AST fuzzer (tsan)" + AST_FUZZER_TEST_UBSAN = "AST fuzzer (ubsan)" + + JEPSEN_KEEPER = "ClickHouse Keeper Jepsen" + JEPSEN_SERVER = "ClickHouse Server Jepsen" + + PERFORMANCE_TEST_AMD64 = "Performance Comparison (release)" + PERFORMANCE_TEST_ARM64 = "Performance Comparison (aarch64)" + + SQL_LOGIC_TEST = "Sqllogic test (release)" + + SQLANCER = "SQLancer (release)" + SQLANCER_DEBUG = "SQLancer (debug)" + SQLTEST = "SQLTest" + + COMPATIBILITY_TEST = "Compatibility check (release)" + COMPATIBILITY_TEST_ARM = "Compatibility check (aarch64)" + + CLICKBENCH_TEST = "ClickBench (release)" + CLICKBENCH_TEST_ARM = "ClickBench (aarch64)" + + LIBFUZZER_TEST = "libFuzzer tests" + + BUILD_CHECK = "Builds" + + DOCS_CHECK = "Docs check" + BUGFIX_VALIDATE = "Bugfix validation" + + +# hack to concatenate Build and non-build jobs under JobNames class +for attr_name in dir(BuildNames): + if not attr_name.startswith("__") and not callable(getattr(BuildNames, attr_name)): + setattr(JobNames, attr_name, getattr(BuildNames, attr_name)) + + +class StatusNames(metaclass=WithIter): + """ + Class with statuses that aren't related to particular jobs + """ + + # overall CI report + CI = "CI running" + # mergeable status + MERGEABLE = "Mergeable Check" + # status of a sync pr + SYNC = "Cloud fork sync (only for ClickHouse Inc. employees)" + # PR formatting check status + PR_CHECK = "PR Check" + + +class SyncState(metaclass=WithIter): + PENDING = "awaiting sync" + # temporary state if GH does not know mergeable state + MERGE_UNKNOWN = "unknown state (might be auto recoverable)" + # changes cannot be pushed/merged to a sync branch + PUSH_FAILED = "push failed" + MERGE_CONFLICTS = "merge conflicts" + TESTING = "awaiting test results" + TESTS_FAILED = "tests failed" + COMPLETED = "completed" + + +@dataclass +class DigestConfig: + # all files, dirs to include into digest, glob supported + include_paths: List[Union[str, Path]] = field(default_factory=list) + # file suffixes to exclude from digest + exclude_files: List[str] = field(default_factory=list) + # directories to exclude from digest + exclude_dirs: List[Union[str, Path]] = field(default_factory=list) + # docker names to include into digest + docker: List[str] = field(default_factory=list) + # git submodules digest + git_submodules: bool = False + + +@dataclass +class LabelConfig: + """ + configures different CI scenarios per CI Tag/GH label + """ + + run_jobs: Iterable[str] = frozenset() + + +@dataclass +class BuildConfig: + name: str + compiler: str + package_type: Literal["deb", "binary", "fuzzers"] + additional_pkgs: bool = False + debug_build: bool = False + coverage: bool = False + sanitizer: str = "" + tidy: bool = False + # sparse_checkout is needed only to test the option itself. + # No particular sense to use it in every build, since it slows down the job. + sparse_checkout: bool = False + comment: str = "" + static_binary_name: str = "" + + def export_env(self, export: bool = False) -> str: + def process(field_name: str, field: Union[bool, str]) -> str: + if isinstance(field, bool): + field = str(field).lower() + elif not isinstance(field, str): + field = "" + if export: + return f"export BUILD_{field_name.upper()}={repr(field)}" + return f"BUILD_{field_name.upper()}={field}" + + return "\n".join(process(k, v) for k, v in self.__dict__.items()) + + +@dataclass +class JobConfig: + """ + contains config parameters for job execution in CI workflow + """ + + # GH Runner type (tag from @Runners) + runner_type: str + # used for config validation in ci unittests + job_name_keyword: str = "" + # builds required for the job (applicable for test jobs) + required_builds: Optional[List[str]] = None + # build config for the build job (applicable for builds) + build_config: Optional[BuildConfig] = None + # configures digest calculation for the job + digest: DigestConfig = field(default_factory=DigestConfig) + # will be triggered for the job if omitted in CI workflow yml + run_command: str = "" + # job timeout, seconds + timeout: Optional[int] = None + # sets number of batches for a multi-batch job + num_batches: int = 1 + # label that enables job in CI, if set digest isn't used + run_by_label: str = "" + # to run always regardless of the job digest or/and label + run_always: bool = False + # if the job needs to be run on the release branch, including master (building packages, docker server). + # NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able. + required_on_release_branch: bool = False + # job is for pr workflow only + pr_only: bool = False + # job is for release/master branches only + release_only: bool = False + # to randomly pick and run one job among jobs in the same @random_bucket (PR branches only). + random_bucket: str = "" + # Do not set it. A list of batches to run. It will be set in runtime in accordance with ci cache and ci settings + batches: Optional[List[int]] = None + # Do not set it. A list of batches to await. It will be set in runtime in accordance with ci cache and ci settings + pending_batches: Optional[List[int]] = None + + def with_properties(self, **kwargs: Any) -> "JobConfig": + res = copy.deepcopy(self) + for k, v in kwargs.items(): + assert hasattr(self, k), f"Setting invalid attribute [{k}]" + setattr(res, k, v) + return res + + def get_required_build(self) -> str: + assert self.required_builds + return self.required_builds[0] + + +class CommonJobConfigs: + """ + Common job configs + """ + + BUILD_REPORT = JobConfig( + job_name_keyword="builds", + run_command="build_report_check.py", + digest=DigestConfig( + include_paths=[ + "./tests/ci/build_report_check.py", + "./tests/ci/upload_result_helper.py", + ], + ), + runner_type=Runners.STYLE_CHECKER_ARM, + ) + COMPATIBILITY_TEST = JobConfig( + job_name_keyword="compatibility", + digest=DigestConfig( + include_paths=["./tests/ci/compatibility_check.py"], + docker=["clickhouse/test-old-ubuntu", "clickhouse/test-old-centos"], + ), + run_command="compatibility_check.py", + runner_type=Runners.STYLE_CHECKER, + ) + INSTALL_TEST = JobConfig( + job_name_keyword="install", + digest=DigestConfig( + include_paths=["./tests/ci/install_check.py"], + docker=["clickhouse/install-deb-test", "clickhouse/install-rpm-test"], + ), + run_command='install_check.py "$CHECK_NAME"', + runner_type=Runners.STYLE_CHECKER, + timeout=900, + ) + STATELESS_TEST = JobConfig( + job_name_keyword="stateless", + digest=DigestConfig( + include_paths=[ + "./tests/ci/functional_test_check.py", + "./tests/queries/0_stateless/", + "./tests/clickhouse-test", + "./tests/config", + "./tests/*.txt", + ], + exclude_files=[".md"], + docker=["clickhouse/stateless-test"], + ), + run_command='functional_test_check.py "$CHECK_NAME"', + runner_type=Runners.FUNC_TESTER, + timeout=10800, + ) + STATEFUL_TEST = JobConfig( + job_name_keyword="stateful", + digest=DigestConfig( + include_paths=[ + "./tests/ci/functional_test_check.py", + "./tests/queries/1_stateful/", + "./tests/clickhouse-test", + "./tests/config", + "./tests/*.txt", + ], + exclude_files=[".md"], + docker=["clickhouse/stateful-test"], + ), + run_command='functional_test_check.py "$CHECK_NAME"', + runner_type=Runners.FUNC_TESTER, + timeout=3600, + ) + STRESS_TEST = JobConfig( + job_name_keyword="stress", + digest=DigestConfig( + include_paths=[ + "./tests/queries/0_stateless/", + "./tests/queries/1_stateful/", + "./tests/clickhouse-test", + "./tests/config", + "./tests/*.txt", + ], + exclude_files=[".md"], + docker=["clickhouse/stress-test"], + ), + run_command="stress_check.py", + runner_type=Runners.STRESS_TESTER, + timeout=9000, + ) + UPGRADE_TEST = JobConfig( + job_name_keyword="upgrade", + digest=DigestConfig( + include_paths=["./tests/ci/upgrade_check.py"], + exclude_files=[".md"], + docker=["clickhouse/upgrade-check"], + ), + run_command="upgrade_check.py", + runner_type=Runners.STRESS_TESTER, + ) + INTEGRATION_TEST = JobConfig( + job_name_keyword="integration", + digest=DigestConfig( + include_paths=[ + "./tests/ci/integration_test_check.py", + "./tests/ci/integration_tests_runner.py", + "./tests/integration/", + ], + exclude_files=[".md"], + docker=IMAGES.copy(), + ), + run_command='integration_test_check.py "$CHECK_NAME"', + runner_type=Runners.STRESS_TESTER, + ) + ASTFUZZER_TEST = JobConfig( + job_name_keyword="ast", + digest=DigestConfig(), + run_command="ast_fuzzer_check.py", + run_always=True, + runner_type=Runners.FUZZER_UNIT_TESTER, + ) + UNIT_TEST = JobConfig( + job_name_keyword="unit", + digest=DigestConfig( + include_paths=["./tests/ci/unit_tests_check.py"], + exclude_files=[".md"], + docker=["clickhouse/unit-test"], + ), + run_command="unit_tests_check.py", + runner_type=Runners.FUZZER_UNIT_TESTER, + ) + PERF_TESTS = JobConfig( + job_name_keyword="performance", + digest=DigestConfig( + include_paths=[ + "./tests/ci/performance_comparison_check.py", + "./tests/performance/", + ], + exclude_files=[".md"], + docker=["clickhouse/performance-comparison"], + ), + run_command="performance_comparison_check.py", + runner_type=Runners.STRESS_TESTER, + ) + SQLLANCER_TEST = JobConfig( + job_name_keyword="lancer", + digest=DigestConfig(), + run_command="sqlancer_check.py", + release_only=True, + run_always=True, + runner_type=Runners.FUZZER_UNIT_TESTER, + ) + SQLLOGIC_TEST = JobConfig( + job_name_keyword="logic", + digest=DigestConfig( + include_paths=["./tests/ci/sqllogic_test.py"], + exclude_files=[".md"], + docker=["clickhouse/sqllogic-test"], + ), + run_command="sqllogic_test.py", + timeout=10800, + release_only=True, + runner_type=Runners.STYLE_CHECKER, + ) + SQL_TEST = JobConfig( + job_name_keyword="sqltest", + digest=DigestConfig( + include_paths=["./tests/ci/sqltest.py"], + exclude_files=[".md"], + docker=["clickhouse/sqltest"], + ), + run_command="sqltest.py", + timeout=10800, + release_only=True, + runner_type=Runners.FUZZER_UNIT_TESTER, + ) + BUGFIX_TEST = JobConfig( + job_name_keyword="bugfix", + digest=DigestConfig(), + run_command="bugfix_validate_check.py", + timeout=900, + runner_type=Runners.FUNC_TESTER, + ) + DOCKER_SERVER = JobConfig( + job_name_keyword="docker", + required_on_release_branch=True, + run_command='docker_server.py --check-name "$CHECK_NAME" --release-type head --allow-build-reuse', + digest=DigestConfig( + include_paths=[ + "tests/ci/docker_server.py", + "./docker/server", + ] + ), + runner_type=Runners.STYLE_CHECKER, + ) + CLICKBENCH_TEST = JobConfig( + job_name_keyword="clickbench", + digest=DigestConfig( + include_paths=[ + "tests/ci/clickbench.py", + ], + docker=["clickhouse/clickbench"], + ), + run_command='clickbench.py "$CHECK_NAME"', + timeout=900, + runner_type=Runners.FUNC_TESTER, + ) + BUILD = JobConfig( + required_on_release_branch=True, + digest=DigestConfig( + include_paths=[ + "./src", + "./contrib/*-cmake", + "./contrib/consistent-hashing", + "./contrib/murmurhash", + "./contrib/libfarmhash", + "./contrib/pdqsort", + "./contrib/cityhash102", + "./contrib/sparse-checkout", + "./contrib/libmetrohash", + "./contrib/update-submodules.sh", + "./contrib/CMakeLists.txt", + "./CMakeLists.txt", + "./PreLoad.cmake", + "./cmake", + "./base", + "./programs", + "./packages", + "./docker/packager/packager", + "./rust", + "./tests/ci/version_helper.py", + # FIXME: This is a WA to rebuild the CH and recreate the Performance.tar.zst artifact + # when there are changes in performance test scripts. + # Due to the current design of the perf test we need to rebuild CH when the performance test changes, + # otherwise the changes will not be visible in the PerformanceTest job in CI + "./tests/performance", + ], + exclude_files=[".md"], + docker=["clickhouse/binary-builder"], + git_submodules=True, + ), + run_command="build_check.py $BUILD_NAME", + runner_type=Runners.BUILDER, + ) + + +REQUIRED_CHECKS = [ + StatusNames.PR_CHECK, + StatusNames.SYNC, + JobNames.BUILD_CHECK, + JobNames.DOCS_CHECK, + JobNames.FAST_TEST, + JobNames.STATEFUL_TEST_RELEASE, + JobNames.STATELESS_TEST_RELEASE, + JobNames.STATELESS_TEST_ASAN, + JobNames.STATELESS_TEST_FLAKY_ASAN, + JobNames.STATEFUL_TEST_ASAN, + JobNames.STYLE_CHECK, + JobNames.UNIT_TEST_ASAN, + JobNames.UNIT_TEST_MSAN, + JobNames.UNIT_TEST, + JobNames.UNIT_TEST_TSAN, + JobNames.UNIT_TEST_UBSAN, + JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, + JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, +] + +# Jobs that run in Merge Queue if it's enabled +MQ_JOBS = [ + JobNames.STYLE_CHECK, + JobNames.FAST_TEST, + BuildNames.BINARY_RELEASE, + JobNames.UNIT_TEST, +] + + +@dataclass +class CheckDescription: + name: str + description: str # the check descriptions, will be put into the status table + match_func: Callable[[str], bool] # the function to check vs the commit status + + def __hash__(self) -> int: + return hash(self.name + self.description) + + +CHECK_DESCRIPTIONS = [ + CheckDescription( + StatusNames.PR_CHECK, + "Checks correctness of the PR's body", + lambda x: x == "PR Check", + ), + CheckDescription( + StatusNames.SYNC, + "If it fails, ask a maintainer for help", + lambda x: x == StatusNames.SYNC, + ), + CheckDescription( + "AST fuzzer", + "Runs randomly generated queries to catch program errors. " + "The build type is optionally given in parenthesis. " + "If it fails, ask a maintainer for help", + lambda x: x.startswith("AST fuzzer"), + ), + CheckDescription( + JobNames.BUGFIX_VALIDATE, + "Checks that either a new test (functional or integration) or there " + "some changed tests that fail with the binary built on master branch", + lambda x: x == JobNames.BUGFIX_VALIDATE, + ), + CheckDescription( + StatusNames.CI, + "A meta-check that indicates the running CI. Normally, it's in success or " + "pending state. The failed status indicates some problems with the PR", + lambda x: x == "CI running", + ), + CheckDescription( + "Builds", + "Builds ClickHouse in various configurations for use in further steps. " + "You have to fix the builds that fail. Build logs often has enough " + "information to fix the error, but you might have to reproduce the failure " + "locally. The cmake options can be found in the build log, grepping for " + 'cmake. Use these options and follow the general build process', + lambda x: x.startswith("ClickHouse") and x.endswith("build check"), + ), + CheckDescription( + "Compatibility check", + "Checks that clickhouse binary runs on distributions with old libc " + "versions. If it fails, ask a maintainer for help", + lambda x: x.startswith("Compatibility check"), + ), + CheckDescription( + JobNames.DOCKER_SERVER, + "The check to build and optionally push the mentioned image to docker hub", + lambda x: x.startswith("Docker server"), + ), + CheckDescription( + JobNames.DOCKER_KEEPER, + "The check to build and optionally push the mentioned image to docker hub", + lambda x: x.startswith("Docker keeper"), + ), + CheckDescription( + JobNames.DOCS_CHECK, + "Builds and tests the documentation", + lambda x: x == JobNames.DOCS_CHECK, + ), + CheckDescription( + JobNames.FAST_TEST, + "Normally this is the first check that is ran for a PR. It builds ClickHouse " + 'and runs most of stateless functional tests, ' + "omitting some. If it fails, further checks are not started until it is fixed. " + "Look at the report to see which tests fail, then reproduce the failure " + 'locally as described here', + lambda x: x == JobNames.FAST_TEST, + ), + CheckDescription( + "Flaky tests", + "Checks if new added or modified tests are flaky by running them repeatedly, " + "in parallel, with more randomization. Functional tests are run 100 times " + "with address sanitizer, and additional randomization of thread scheduling. " + "Integration tests are run up to 10 times. If at least once a new test has " + "failed, or was too long, this check will be red. We don't allow flaky tests, " + 'read the doc', + lambda x: "tests flaky check" in x, + ), + CheckDescription( + "Install packages", + "Checks that the built packages are installable in a clear environment", + lambda x: x.startswith("Install packages ("), + ), + CheckDescription( + "Integration tests", + "The integration tests report. In parenthesis the package type is given, " + "and in square brackets are the optional part/total tests", + lambda x: x.startswith("Integration tests ("), + ), + CheckDescription( + StatusNames.MERGEABLE, + "Checks if all other necessary checks are successful", + lambda x: x == StatusNames.MERGEABLE, + ), + CheckDescription( + "Performance Comparison", + "Measure changes in query performance. The performance test report is " + 'described in detail here. ' + "In square brackets are the optional part/total tests", + lambda x: x.startswith("Performance Comparison"), + ), + CheckDescription( + "Push to Dockerhub", + "The check for building and pushing the CI related docker images to docker hub", + lambda x: x.startswith("Push") and "to Dockerhub" in x, + ), + CheckDescription( + "Sqllogic", + "Run clickhouse on the " + 'sqllogic ' + "test set against sqlite and checks that all statements are passed", + lambda x: x.startswith("Sqllogic test"), + ), + CheckDescription( + "SQLancer", + "Fuzzing tests that detect logical bugs with " + 'SQLancer tool', + lambda x: x.startswith("SQLancer"), + ), + CheckDescription( + "Stateful tests", + "Runs stateful functional tests for ClickHouse binaries built in various " + "configurations -- release, debug, with sanitizers, etc", + lambda x: x.startswith("Stateful tests ("), + ), + CheckDescription( + "Stateless tests", + "Runs stateless functional tests for ClickHouse binaries built in various " + "configurations -- release, debug, with sanitizers, etc", + lambda x: x.startswith("Stateless tests ("), + ), + CheckDescription( + "Stress test", + "Runs stateless functional tests concurrently from several clients to detect " + "concurrency-related errors", + lambda x: x.startswith("Stress test ("), + ), + CheckDescription( + JobNames.STYLE_CHECK, + "Runs a set of checks to keep the code style clean. If some of tests failed, " + "see the related log from the report", + lambda x: x == JobNames.STYLE_CHECK, + ), + CheckDescription( + "Unit tests", + "Runs the unit tests for different release types", + lambda x: x.startswith("Unit tests ("), + ), + CheckDescription( + "Upgrade check", + "Runs stress tests on server version from last release and then tries to " + "upgrade it to the version from the PR. It checks if the new server can " + "successfully startup without any errors, crashes or sanitizer asserts", + lambda x: x.startswith("Upgrade check ("), + ), + CheckDescription( + "ClickBench", + "Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table", + lambda x: x.startswith("ClickBench"), + ), + CheckDescription( + "Fallback for unknown", + "There's no description for the check yet, please add it to " + "tests/ci/ci_config.py:CHECK_DESCRIPTIONS", + lambda x: True, + ), +] diff --git a/tests/ci/ci_settings.py b/tests/ci/ci_settings.py index 62e7826dac5..7b2dd12c310 100644 --- a/tests/ci/ci_settings.py +++ b/tests/ci/ci_settings.py @@ -3,7 +3,7 @@ from dataclasses import dataclass, asdict from typing import Optional, List, Dict, Any, Iterable from ci_utils import normalize_string -from ci_config import CILabels, CI_CONFIG, JobConfig, JobNames +from ci_config import CI from git_helper import Runner as GitRunner, GIT_PREFIX from pr_info import PRInfo @@ -29,6 +29,7 @@ class CiSettings: no_ci_cache: bool = False upload_all: bool = False no_merge_commit: bool = False + woolen_wolfdog: bool = False def as_dict(self) -> Dict[str, Any]: return asdict(self) @@ -80,7 +81,7 @@ class CiSettings: if not res.ci_jobs: res.ci_jobs = [] res.ci_jobs.append(match.removeprefix("job_")) - elif match.startswith("ci_set_") and match in CILabels: + elif match.startswith("ci_set_") and match in CI.Tags: if not res.ci_sets: res.ci_sets = [] res.ci_sets.append(match) @@ -97,17 +98,20 @@ class CiSettings: res.exclude_keywords += [ normalize_string(keyword) for keyword in keywords ] - elif match == CILabels.NO_CI_CACHE: + elif match == CI.Tags.NO_CI_CACHE: res.no_ci_cache = True print("NOTE: CI Cache will be disabled") - elif match == CILabels.UPLOAD_ALL_ARTIFACTS: + elif match == CI.Tags.UPLOAD_ALL_ARTIFACTS: res.upload_all = True print("NOTE: All binary artifacts will be uploaded") - elif match == CILabels.DO_NOT_TEST_LABEL: + elif match == CI.Tags.DO_NOT_TEST_LABEL: res.do_not_test = True - elif match == CILabels.NO_MERGE_COMMIT: + elif match == CI.Tags.NO_MERGE_COMMIT: res.no_merge_commit = True print("NOTE: Merge Commit will be disabled") + elif match == CI.Tags.WOOLEN_WOLFDOG_LABEL: + res.woolen_wolfdog = True + print("NOTE: Woolen Wolfdog mode enabled") elif match.startswith("batch_"): batches = [] try: @@ -131,17 +135,18 @@ class CiSettings: def _check_if_selected( self, job: str, - job_config: JobConfig, + job_config: CI.JobConfig, is_release: bool, is_pr: bool, + is_mq: bool, labels: Iterable[str], ) -> bool: # type: ignore #too-many-return-statements if self.do_not_test: - label_config = CI_CONFIG.get_label_config(CILabels.DO_NOT_TEST_LABEL) - assert label_config, f"Unknown tag [{CILabels.DO_NOT_TEST_LABEL}]" + label_config = CI.get_tag_config(CI.Tags.DO_NOT_TEST_LABEL) + assert label_config, f"Unknown tag [{CI.Tags.DO_NOT_TEST_LABEL}]" if job in label_config.run_jobs: print( - f"Job [{job}] present in CI set [{CILabels.DO_NOT_TEST_LABEL}] - pass" + f"Job [{job}] present in CI set [{CI.Tags.DO_NOT_TEST_LABEL}] - pass" ) return True return False @@ -163,7 +168,7 @@ class CiSettings: to_deny = False if self.include_keywords: - if job == JobNames.STYLE_CHECK: + if job == CI.JobNames.STYLE_CHECK: # never exclude Style Check by include keywords return True for keyword in self.include_keywords: @@ -174,7 +179,7 @@ class CiSettings: if self.ci_sets: for tag in self.ci_sets: - label_config = CI_CONFIG.get_label_config(tag) + label_config = CI.get_tag_config(tag) assert label_config, f"Unknown tag [{tag}]" if job in label_config.run_jobs: print(f"Job [{job}] present in CI set [{tag}] - pass") @@ -189,34 +194,43 @@ class CiSettings: if job_config.release_only and not is_release: return False - elif job_config.pr_only and not is_pr: + elif job_config.pr_only and not is_pr and not is_mq: return False return not to_deny def apply( self, - job_configs: Dict[str, JobConfig], + job_configs: Dict[str, CI.JobConfig], is_release: bool, is_pr: bool, + is_mq: bool, labels: Iterable[str], - ) -> Dict[str, JobConfig]: + ) -> Dict[str, CI.JobConfig]: """ Apply CI settings from pr body """ res = {} for job, job_config in job_configs.items(): if self._check_if_selected( - job, job_config, is_release=is_release, is_pr=is_pr, labels=labels + job, + job_config, + is_release=is_release, + is_pr=is_pr, + is_mq=is_mq, + labels=labels, ): res[job] = job_config + add_parents = [] for job in list(res): - parent_jobs = CI_CONFIG.get_job_parents(job) + parent_jobs = CI.get_job_parents(job) for parent_job in parent_jobs: if parent_job not in res: + add_parents.append(parent_job) print(f"Job [{job}] requires [{parent_job}] - add") - res[parent_job] = job_configs[parent_job] + for job in add_parents: + res[job] = job_configs[job] for job, job_config in res.items(): batches = [] diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index a0d6495452f..fdc9c002b66 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -17,9 +17,8 @@ from github.GithubObject import NotSet from github.IssueComment import IssueComment from github.Repository import Repository -from ci_config import CHECK_DESCRIPTIONS, CheckDescription, StatusNames, CIConfig -from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY, TEMP_PATH -from lambda_shared_package.lambda_shared.pr import Labels +from ci_config import CI +from env_helper import GITHUB_REPOSITORY, TEMP_PATH from pr_info import PRInfo from report import ( ERROR, @@ -29,7 +28,6 @@ from report import ( StatusType, TestResult, TestResults, - get_status, get_worst_status, ) from s3_helper import S3Helper @@ -103,7 +101,12 @@ def post_commit_status( if i == RETRY - 1: raise ex time.sleep(i) - if pr_info: + if pr_info and check_name not in ( + CI.StatusNames.MERGEABLE, + CI.StatusNames.CI, + CI.StatusNames.PR_CHECK, + CI.StatusNames.SYNC, + ): status_updated = False for i in range(RETRY): try: @@ -157,10 +160,21 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: gh.__requester = commit._requester # type:ignore #pylint:disable=protected-access repo = get_repo(gh) statuses = sorted(get_commit_filtered_statuses(commit), key=lambda x: x.context) + statuses = [ + status + for status in statuses + if status.context + not in ( + CI.StatusNames.MERGEABLE, + CI.StatusNames.CI, + CI.StatusNames.PR_CHECK, + CI.StatusNames.SYNC, + ) + ] if not statuses: return - if not [status for status in statuses if status.context == StatusNames.CI]: + if not [status for status in statuses if status.context == CI.StatusNames.CI]: # This is the case, when some statuses already exist for the check, # but not the StatusNames.CI. We should create it as pending. # W/o pr_info to avoid recursion, and yes, one extra create_ci_report @@ -169,7 +183,7 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: PENDING, create_ci_report(pr_info, statuses), "The report for running CI", - StatusNames.CI, + CI.StatusNames.CI, ) # We update the report in generate_status_comment function, so do it each @@ -212,20 +226,20 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str: f"\n" ) # group checks by the name to get the worst one per each - grouped_statuses = {} # type: Dict[CheckDescription, CommitStatuses] + grouped_statuses = {} # type: Dict[CI.CheckDescription, CommitStatuses] for status in statuses: cd = None - for c in CHECK_DESCRIPTIONS: + for c in CI.CHECK_DESCRIPTIONS: if c.match_func(status.context): cd = c break - if cd is None or cd == CHECK_DESCRIPTIONS[-1]: + if cd is None or cd == CI.CHECK_DESCRIPTIONS[-1]: # This is the case for either non-found description or a fallback - cd = CheckDescription( + cd = CI.CheckDescription( status.context, - CHECK_DESCRIPTIONS[-1].description, - CHECK_DESCRIPTIONS[-1].match_func, + CI.CHECK_DESCRIPTIONS[-1].description, + CI.CHECK_DESCRIPTIONS[-1].match_func, ) if cd in grouped_statuses: @@ -301,7 +315,7 @@ def create_ci_report(pr_info: PRInfo, statuses: CommitStatuses) -> str: ) ) return upload_results( - S3Helper(), pr_info.number, pr_info.sha, test_results, [], StatusNames.CI + S3Helper(), pr_info.number, pr_info.sha, test_results, [], CI.StatusNames.CI ) @@ -435,43 +449,22 @@ def set_mergeable_check( state, report_url, format_description(description), - StatusNames.MERGEABLE, + CI.StatusNames.MERGEABLE, ) -def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> None: - "check if the check_name in REQUIRED_CHECKS and then trigger update" - not_run = ( - pr_info.labels.intersection({Labels.SKIP_MERGEABLE_CHECK, Labels.RELEASE}) - or not CIConfig.is_required(check_name) - or pr_info.release_pr - or pr_info.number == 0 - ) - - if not_run: - # Let's avoid unnecessary work - return - - logging.info("Update Mergeable Check by %s", check_name) - - statuses = get_commit_filtered_statuses(commit) - trigger_mergeable_check(commit, statuses) - - def trigger_mergeable_check( commit: Commit, statuses: CommitStatuses, - set_if_green: bool = False, + set_from_sync: bool = False, workflow_failed: bool = False, ) -> StatusType: """calculate and update StatusNames.MERGEABLE""" - required_checks = [ - status for status in statuses if CIConfig.is_required(status.context) - ] + required_checks = [status for status in statuses if CI.is_required(status.context)] mergeable_status = None for status in statuses: - if status.context == StatusNames.MERGEABLE: + if status.context == CI.StatusNames.MERGEABLE: mergeable_status = status break @@ -503,63 +496,43 @@ def trigger_mergeable_check( description = format_description(description) - if not set_if_green and state == SUCCESS: - # do not set green Mergeable Check status - pass - else: - if mergeable_status is None or mergeable_status.description != description: + if set_from_sync: + # update Mergeable Check from sync WF only if its status already present or its new status is not SUCCESS + # to avoid false-positives + if mergeable_status or state != SUCCESS: set_mergeable_check(commit, description, state) + elif mergeable_status is None or mergeable_status.description != description: + set_mergeable_check(commit, description, state) return state def update_upstream_sync_status( - upstream_pr_number: int, - sync_pr_number: int, - gh: Github, + pr_info: PRInfo, state: StatusType, - can_set_green_mergeable_status: bool = False, ) -> None: - upstream_repo = gh.get_repo(GITHUB_UPSTREAM_REPOSITORY) - upstream_pr = upstream_repo.get_pull(upstream_pr_number) - sync_repo = gh.get_repo(GITHUB_REPOSITORY) - sync_pr = sync_repo.get_pull(sync_pr_number) - # Find the commit that is in both repos, upstream and cloud - sync_commits = sync_pr.get_commits().reversed - upstream_commits = upstream_pr.get_commits().reversed - # Github objects are compared by _url attribute. We can't compare them directly and - # should compare commits by SHA1 - upstream_shas = [c.sha for c in upstream_commits] - logging.info("Commits in upstream PR:\n %s", ", ".join(upstream_shas)) - sync_shas = [c.sha for c in sync_commits] - logging.info("Commits in sync PR:\n %s", ", ".join(reversed(sync_shas))) + last_synced_upstream_commit = pr_info.get_latest_sync_commit() - # find latest synced commit - last_synced_upstream_commit = None - for commit in upstream_commits: - if commit.sha in sync_shas: - last_synced_upstream_commit = commit - break - - assert last_synced_upstream_commit - - sync_status = get_status(state) logging.info( - "Using commit %s to post the %s status `%s`: [%s]", + "Using commit [%s] to post the [%s] status [%s]", last_synced_upstream_commit.sha, - sync_status, - StatusNames.SYNC, - "", + state, + CI.StatusNames.SYNC, ) + if state == SUCCESS: + description = CI.SyncState.COMPLETED + else: + description = CI.SyncState.TESTS_FAILED + post_commit_status( last_synced_upstream_commit, - sync_status, + state, "", - "", - StatusNames.SYNC, + description, + CI.StatusNames.SYNC, ) trigger_mergeable_check( last_synced_upstream_commit, get_commit_filtered_statuses(last_synced_upstream_commit), - set_if_green=can_set_green_mergeable_status, + set_from_sync=True, ) diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index e7fee827320..bb0c717160e 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -196,7 +196,7 @@ def main(): # See https://sourceware.org/glibc/wiki/Glibc%20Timeline max_glibc_version = "" - if "amd64" in check_name: + if "amd64" in check_name or "release" in check_name: max_glibc_version = "2.4" elif "aarch64" in check_name: max_glibc_version = "2.18" # because of build with newer sysroot? diff --git a/tests/ci/digest_helper.py b/tests/ci/digest_helper.py index 8d6ec127f6e..4dcfb03c04f 100644 --- a/tests/ci/digest_helper.py +++ b/tests/ci/digest_helper.py @@ -9,10 +9,10 @@ from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Union from sys import modules from docker_images_helper import get_images_info -from ci_config import DigestConfig from git_helper import Runner from env_helper import ROOT_DIR from ci_utils import cd +from ci_config import CI DOCKER_DIGEST_LEN = 12 JOB_DIGEST_LEN = 10 @@ -139,20 +139,21 @@ class DockerDigester: class JobDigester: - def __init__(self): + def __init__(self, dry_run: bool = False): self.dd = DockerDigester() self.cache: Dict[str, str] = {} + self.dry_run = dry_run @staticmethod - def _get_config_hash(digest_config: DigestConfig) -> str: + def _get_config_hash(digest_config: CI.DigestConfig) -> str: data_dict = asdict(digest_config) hash_obj = md5() hash_obj.update(str(data_dict).encode()) hash_string = hash_obj.hexdigest() return hash_string - def get_job_digest(self, digest_config: DigestConfig) -> str: - if not digest_config.include_paths: + def get_job_digest(self, digest_config: CI.DigestConfig) -> str: + if not digest_config.include_paths or self.dry_run: # job is not for digest return "f" * JOB_DIGEST_LEN diff --git a/tests/ci/download_binary.py b/tests/ci/download_binary.py index 79db1e57d62..b0b5659ca83 100755 --- a/tests/ci/download_binary.py +++ b/tests/ci/download_binary.py @@ -8,7 +8,7 @@ import logging from pathlib import Path from build_download_helper import download_build_with_progress -from ci_config import CI_CONFIG +from ci_config import CI from env_helper import RUNNER_TEMP, S3_ARTIFACT_DOWNLOAD_TEMPLATE from git_helper import Git, commit from version_helper import get_version_from_repo, version_arg @@ -59,7 +59,8 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) for build in args.build_names: # check if it's in CI_CONFIG - config = CI_CONFIG.build_config[build] + config = CI.JOB_CONFIGS[build].build_config + assert config if args.rename and config.static_binary_name: path = temp_path / f"clickhouse-{config.static_binary_name}" else: diff --git a/tests/ci/download_release_packages.py b/tests/ci/download_release_packages.py index 1ba4ff8ff2e..8f3a2190ae8 100755 --- a/tests/ci/download_release_packages.py +++ b/tests/ci/download_release_packages.py @@ -1,79 +1,38 @@ #!/usr/bin/env python3 import logging -import os +from pathlib import Path -import requests -from requests.adapters import HTTPAdapter # type: ignore -from urllib3.util.retry import Retry # type: ignore - -from get_previous_release_tag import ReleaseInfo, get_previous_release - -CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" - -DOWNLOAD_PREFIX = ( - "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/" +from build_download_helper import DownloadException, download_build_with_progress +from get_previous_release_tag import ( + ReleaseInfo, + get_previous_release, + get_release_by_tag, ) -CLICKHOUSE_COMMON_STATIC_PACKAGE_NAME = "clickhouse-common-static_{version}_amd64.deb" -CLICKHOUSE_COMMON_STATIC_DBG_PACKAGE_NAME = ( - "clickhouse-common-static-dbg_{version}_amd64.deb" -) -CLICKHOUSE_CLIENT_PACKAGE_NAME = "clickhouse-client_{version}_amd64.deb" -CLICKHOUSE_LIBRARY_BRIDGE_PACKAGE_NAME = "clickhouse-library-bridge_{version}_amd64.deb" -CLICKHOUSE_ODBC_BRIDGE_PACKAGE_NAME = "clickhouse-odbc-bridge_{version}_amd64.deb" -CLICKHOUSE_SERVER_PACKAGE_NAME = "clickhouse-server_{version}_amd64.deb" -PACKAGES_DIR = "previous_release_package_folder/" -VERSION_PATTERN = r"((?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" +PACKAGES_DIR = Path("previous_release_package_folder") -def download_package(url, out_path, retries=10, backoff_factor=0.3): - session = requests.Session() - retry = Retry( - total=retries, - read=retries, - connect=retries, - backoff_factor=backoff_factor, - status_forcelist=[500, 502, 503, 504], - ) - adapter = HTTPAdapter(max_retries=retry) - session.mount("http://", adapter) - session.mount("https://", adapter) - response = session.get(url) - response.raise_for_status() - print(f"Download {url} to {out_path}") - with open(out_path, "wb") as fd: - fd.write(response.content) - - -def download_packages(release, dest_path=PACKAGES_DIR): - if not os.path.exists(dest_path): - os.makedirs(dest_path) +def download_packages(release: ReleaseInfo, dest_path: Path = PACKAGES_DIR) -> None: + dest_path.mkdir(parents=True, exist_ok=True) logging.info("Will download %s", release) - def get_dest_path(pkg_name): - return os.path.join(dest_path, pkg_name) - - for pkg in ( - CLICKHOUSE_COMMON_STATIC_PACKAGE_NAME, - CLICKHOUSE_COMMON_STATIC_DBG_PACKAGE_NAME, - CLICKHOUSE_CLIENT_PACKAGE_NAME, - CLICKHOUSE_LIBRARY_BRIDGE_PACKAGE_NAME, - CLICKHOUSE_ODBC_BRIDGE_PACKAGE_NAME, - CLICKHOUSE_SERVER_PACKAGE_NAME, - ): - url = (DOWNLOAD_PREFIX + pkg).format(version=release.version, type=release.type) - pkg_name = get_dest_path(pkg.format(version=release.version)) - download_package(url, pkg_name) + for pkg, url in release.assets.items(): + if not pkg.endswith("_amd64.deb") or "-dbg_" in pkg: + continue + pkg_name = dest_path / pkg + download_build_with_progress(url, pkg_name) -def download_last_release(dest_path): +def download_last_release(dest_path: Path) -> None: current_release = get_previous_release(None) + if current_release is None: + raise DownloadException("The current release is not found") download_packages(current_release, dest_path=dest_path) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - release = ReleaseInfo(input()) + release = get_release_by_tag(input()) download_packages(release) diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 36732bd7c9f..5217e4035da 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -9,8 +9,9 @@ from build_download_helper import APIException, get_gh_api module_dir = p.abspath(p.dirname(__file__)) git_root = p.abspath(p.join(module_dir, "..", "..")) + ROOT_DIR = git_root -CI = bool(os.getenv("CI")) +IS_CI = bool(os.getenv("CI")) TEMP_PATH = os.getenv("TEMP_PATH", p.abspath(p.join(module_dir, "./tmp"))) REPORT_PATH = f"{TEMP_PATH}/reports" # FIXME: latest should not be used in CI, set temporary for transition to "docker with digest as a tag" diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 12756599865..385caccc8cd 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -1,103 +1,92 @@ #!/usr/bin/env python3 +import argparse import logging -import sys from github import Github -from ci_config import StatusNames +from ci_config import CI from commit_status_helper import ( get_commit, get_commit_filtered_statuses, post_commit_status, - set_mergeable_check, - trigger_mergeable_check, - update_upstream_sync_status, ) -from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY from get_robot_token import get_best_robot_token from pr_info import PRInfo from report import FAILURE, PENDING, SUCCESS, StatusType -from synchronizer_utils import SYNC_BRANCH_PREFIX + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Script to merge the given PR. Additional checks for approved " + "status and green commit statuses could be done", + ) + parser.add_argument( + "--wf-status", + type=str, + default="", + help="overall workflow status [success|failure]", + ) + return parser.parse_args() def main(): logging.basicConfig(level=logging.INFO) + args = parse_args() - has_failure = False - - # FIXME: temporary hack to fail Mergeable Check in MQ if pipeline has any failed jobs - if len(sys.argv) > 1 and sys.argv[1] == "--pipeline-failure": - has_failure = True + has_workflow_failures = args.wf_status == FAILURE pr_info = PRInfo(need_orgs=True) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - statuses = None - if pr_info.is_merge_queue: - # in MQ Mergeable check status must never be green if any failures in workflow - if has_failure: - set_mergeable_check(commit, "workflow failed", "failure") - else: - # This must be the only place where green MCheck is set in the MQ (in the end of CI) to avoid early merge - set_mergeable_check(commit, "workflow passed", "success") - else: - statuses = get_commit_filtered_statuses(commit) - state = trigger_mergeable_check(commit, statuses, set_if_green=True) + statuses = get_commit_filtered_statuses(commit) - # Process upstream StatusNames.SYNC - if ( - pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") - and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ci_running_statuses = [s for s in statuses if s.context == CI.StatusNames.CI] + if not ci_running_statuses: + return + # Take the latest status + ci_status = ci_running_statuses[-1] + + has_failure = False + has_pending = False + error_cnt = 0 + for status in statuses: + if status.context in ( + CI.StatusNames.MERGEABLE, + CI.StatusNames.CI, + CI.StatusNames.SYNC, ): - upstream_pr_number = int(pr_info.head_ref.split("/pr/", maxsplit=1)[1]) - update_upstream_sync_status( - upstream_pr_number, - pr_info.number, - gh, - state, - can_set_green_mergeable_status=True, - ) + # do not account these statuses + continue + if status.state == PENDING: + has_pending = True + elif status.state != SUCCESS: + has_failure = True + error_cnt += 1 - ci_running_statuses = [s for s in statuses if s.context == StatusNames.CI] - if not ci_running_statuses: - return - # Take the latest status - ci_status = ci_running_statuses[-1] + ci_state = SUCCESS # type: StatusType + description = "All checks finished" + if has_failure: + ci_state = FAILURE + description = f"All checks finished. {error_cnt} jobs failed" + elif has_workflow_failures: + ci_state = FAILURE + description = "All checks finished. Workflow has failures." + elif has_pending: + print("ERROR: CI must not have pending jobs by the time of finish check") + description = "ERROR: workflow has pending jobs" + ci_state = FAILURE - has_failure = False - has_pending = False - for status in statuses: - if status.context in (StatusNames.MERGEABLE, StatusNames.CI): - # do not account these statuses - continue - if status.state == PENDING: - if status.context == StatusNames.SYNC: - # do not account sync status if pending - it's a different WF - continue - has_pending = True - elif status.state == SUCCESS: - continue - else: - has_failure = True - - ci_state = SUCCESS # type: StatusType - if has_failure: - ci_state = FAILURE - elif has_pending: - print("ERROR: CI must not have pending jobs by the time of finish check") - ci_state = FAILURE - - if ci_status.state == PENDING: - post_commit_status( - commit, - ci_state, - ci_status.target_url, - "All checks finished", - StatusNames.CI, - pr_info, - dump_to_file=True, - ) + post_commit_status( + commit, + ci_state, + ci_status.target_url, + description, + CI.StatusNames.CI, + pr_info, + dump_to_file=True, + ) if __name__ == "__main__": diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 5bb46d7ec2f..ee459ce35a0 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -122,6 +122,10 @@ def _get_statless_tests_to_run(pr_info: PRInfo) -> List[str]: for fpath in pr_info.changed_files: if re.match(r"tests/queries/0_stateless/[0-9]{5}", fpath): + path_ = Path(REPO_COPY + "/" + fpath) + if not path_.exists(): + logging.info("File '%s' is removed - skip", fpath) + continue logging.info("File '%s' is changed and seems like a test", fpath) fname = fpath.split("/")[3] fname_without_ext = os.path.splitext(fname)[0] @@ -309,9 +313,6 @@ def main(): state, description, test_results, additional_logs = process_results( result_path, server_log_path ) - # FIXME (alesapin) - if "azure" in check_name: - state = "success" else: print( "This is validate bugfix or flaky check run, but no changes test to run - skip with success" diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index bc0cb975ef5..2b4d09aa326 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -2,47 +2,37 @@ import logging import re -from typing import List, Optional, Tuple +from typing import Dict, List, Optional, Tuple -import requests - -CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" -CLICKHOUSE_PACKAGE_URL = ( - "https://github.com/ClickHouse/ClickHouse/releases/download/" - "v{version}-{type}/clickhouse-common-static_{version}_amd64.deb" +from build_download_helper import get_gh_api +from git_helper import TAG_REGEXP +from version_helper import ( + ClickHouseVersion, + get_version_from_string, + get_version_from_tag, ) -VERSION_PATTERN = r"(v(?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" + +CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/releases" +PACKAGE_REGEXP = r"\Aclickhouse-common-static_.+[.]deb" logger = logging.getLogger(__name__) -class Version: - def __init__(self, version: str): - self.version = version - - def __lt__(self, other: "Version") -> bool: - return list(map(int, self.version.split("."))) < list( - map(int, other.version.split(".")) - ) - - def __str__(self): - return self.version - - class ReleaseInfo: - def __init__(self, release_tag: str): - self.version = Version(release_tag[1:].split("-")[0]) - self.type = release_tag[1:].split("-")[1] + def __init__(self, release_tag: str, assets: Dict[str, str]): + self.version = get_version_from_tag(release_tag) + self.type = self.version.description + self.assets = assets def __str__(self): - return f"v{self.version}-{self.type}" + return self.version.describe def __repr__(self): - return f"ReleaseInfo: {self.version}-{self.type}" + return f"ReleaseInfo: {self.version.describe}" def find_previous_release( - server_version: Optional[Version], releases: List[ReleaseInfo] + server_version: Optional[ClickHouseVersion], releases: List[ReleaseInfo] ) -> Tuple[bool, Optional[ReleaseInfo]]: releases.sort(key=lambda x: x.version, reverse=True) @@ -54,15 +44,7 @@ def find_previous_release( # Check if the artifact exists on GitHub. # It can be not true for a short period of time # after creating a tag for a new release before uploading the packages. - if ( - requests.head( - CLICKHOUSE_PACKAGE_URL.format( - version=release.version, type=release.type - ), - timeout=10, - ).status_code - != 404 - ): + if any(re.match(PACKAGE_REGEXP, name) for name in release.assets.keys()): return True, release logger.debug( @@ -74,12 +56,14 @@ def find_previous_release( return False, None -def get_previous_release(server_version: Optional[Version]) -> Optional[ReleaseInfo]: +def get_previous_release( + server_version: Optional[ClickHouseVersion], +) -> Optional[ReleaseInfo]: page = 1 found = False while not found: - response = requests.get( - CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100}, timeout=10 + response = get_gh_api( + CLICKHOUSE_TAGS_URL, params={"page": page, "per_page": 100}, timeout=10 ) if not response.ok: logger.error( @@ -87,24 +71,42 @@ def get_previous_release(server_version: Optional[Version]) -> Optional[ReleaseI ) response.raise_for_status() - releases_str = set(re.findall(VERSION_PATTERN, response.text)) - if len(releases_str) == 0: - raise ValueError( - "Cannot find previous release for " - + str(server_version) - + " server version" - ) + releases = response.json() - releases = [ReleaseInfo(release) for release in releases_str] - found, previous_release = find_previous_release(server_version, releases) + release_infos = [] # type: List[ReleaseInfo] + for r in releases: + if re.match(TAG_REGEXP, r["tag_name"]): + assets = { + a["name"]: a["browser_download_url"] + for a in r["assets"] + if a["state"] == "uploaded" + } + release_infos.append(ReleaseInfo(r["tag_name"], assets)) + found, previous_release = find_previous_release(server_version, release_infos) page += 1 return previous_release +def get_release_by_tag(tag: str) -> ReleaseInfo: + response = get_gh_api(f"{CLICKHOUSE_TAGS_URL}/tags/{tag}", timeout=10) + release = response.json() + assets = { + a["name"]: a["browser_download_url"] + for a in release["assets"] + if a["state"] == "uploaded" + } + return ReleaseInfo(release["tag_name"], assets) + + def main(): logging.basicConfig(level=logging.INFO) - server_version = Version(input()) + version_string = input() + version_string = version_string.split("+", maxsplit=1)[0] + try: + server_version = get_version_from_string(version_string) + except ValueError: + server_version = get_version_from_tag(version_string) print(get_previous_release(server_version)) diff --git a/tests/ci/git_helper.py b/tests/ci/git_helper.py index 8ec90dd7b2d..6b66bc44d10 100644 --- a/tests/ci/git_helper.py +++ b/tests/ci/git_helper.py @@ -7,7 +7,7 @@ import os.path as p import re import subprocess import tempfile -from typing import Any, List, Optional +from typing import Any, List, Literal, Optional logger = logging.getLogger(__name__) @@ -15,7 +15,9 @@ logger = logging.getLogger(__name__) # \A and \Z match only start and end of the whole string RELEASE_BRANCH_REGEXP = r"\A\d+[.]\d+\Z" TAG_REGEXP = ( - r"\Av\d{2}[.][1-9]\d*[.][1-9]\d*[.][1-9]\d*-(testing|prestable|stable|lts)\Z" + r"\Av\d{2}" # First two digits of major part + r"([.][1-9]\d*){3}" # minor.patch.tweak parts + r"-(new|testing|prestable|stable|lts)\Z" # suffix with a version type ) SHA_REGEXP = re.compile(r"\A([0-9]|[a-f]){40}\Z") @@ -122,17 +124,35 @@ class Git: _tag_pattern = re.compile(TAG_REGEXP) def __init__(self, ignore_no_tags: bool = False): + """ + new_tag is used for special v24.1.1.1-new tags where the previous version is moved to the release branch + * 66666666666 Some another commit with version 24.8.1.xxxxx-testing, tweak is counted from new_tag = v24.8.1.1-new + | * 55555555555 (tag: v24.7.1.123123123-stable, branch: 24.7) tweak counted from new_tag = v24.7.1.1-new + |/ + * 44444444444 (tag: v24.8.1.1-new) + | * 33333333333 (tag: v24.6.1.123123123-stable, branch: 24.6) tweak counted from new_tag = v24.6.1.1-new + |/ + * 22222222222 (tag: v24.7.1.1-new) + | * 11111111111 (tag: v24.5.1.123123123-stable, branch: 24.5) tweak counted from new_tag = v24.4.1.2088-stable + |/ + * 00000000000 (tag: v24.6.1.1-new) + * 6d4b31322d1 (tag: v24.4.1.2088-stable) + * 2c5c589a882 (tag: v24.3.1.2672-lts) + * 891689a4150 (tag: v24.2.1.2248-stable) + * 5a024dfc093 (tag: v24.1.1.2048-stable) + * a2faa65b080 (tag: v23.12.1.1368-stable) + * 05bc8ef1e02 (tag: v23.11.1.2711-stable) + """ self.root = git_runner.cwd self._ignore_no_tags = ignore_no_tags self.run = git_runner.run self.latest_tag = "" self.new_tag = "" - self.new_branch = "" self.branch = "" self.sha = "" self.sha_short = "" - self.description = "shallow-checkout" - self.commits_since_tag = 0 + self.commits_since_latest = 0 + self.commits_since_new = 0 self.update() def update(self): @@ -155,10 +175,20 @@ class Git: stderr = subprocess.DEVNULL if suppress_stderr else None self.latest_tag = self.run("git describe --tags --abbrev=0", stderr=stderr) # Format should be: {latest_tag}-{commits_since_tag}-g{sha_short} - self.description = self.run("git describe --tags --long") - self.commits_since_tag = int( + self.commits_since_latest = int( self.run(f"git rev-list {self.latest_tag}..HEAD --count") ) + if self.latest_tag.endswith("-new"): + # We won't change the behaviour of the the "latest_tag" + # So here we set "new_tag" to the previous tag in the graph, that will allow + # getting alternative "tweak" + self.new_tag = self.run( + f"git describe --tags --abbrev=0 --exclude='{self.latest_tag}'", + stderr=stderr, + ) + self.commits_since_new = int( + self.run(f"git rev-list {self.new_tag}..HEAD --count") + ) @staticmethod def check_tag(value: str) -> None: @@ -187,19 +217,34 @@ class Git: @property def tweak(self) -> int: - if not self.latest_tag.endswith("-testing"): + return self._tweak("latest") + + @property + def tweak_to_new(self) -> int: + return self._tweak("new") + + def _tweak(self, tag_type: Literal["latest", "new"]) -> int: + """Accepts latest or new as a tag_type and returns the tweak number to it""" + if tag_type == "latest": + commits = self.commits_since_latest + tag = self.latest_tag + else: + commits = self.commits_since_new + tag = self.new_tag + + if not tag.endswith("-testing"): # When we are on the tag, we still need to have tweak=1 to not # break cmake with versions like 12.13.14.0 - if not self.commits_since_tag: - # We are in a tagged commit. The tweak should match the - # current version's value - version = self.latest_tag.split("-", maxsplit=1)[0] - try: - return int(version.split(".")[-1]) - except ValueError: - # There are no tags, or a wrong tag. Return default - return TWEAK - return self.commits_since_tag + if commits: + return commits + # We are in a tagged commit or shallow checkout. The tweak should match the + # current version's value + version = tag.split("-", maxsplit=1)[0] + try: + return int(version.split(".")[-1]) + except ValueError: + # There are no tags (shallow checkout), or a wrong tag. Return default + return TWEAK - version = self.latest_tag.split("-", maxsplit=1)[0] - return int(version.split(".")[-1]) + self.commits_since_tag + version = tag.split("-", maxsplit=1)[0] + return int(version.split(".")[-1]) + commits diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py index eb0f6c24527..431e6977091 100644 --- a/tests/ci/github_helper.py +++ b/tests/ci/github_helper.py @@ -6,7 +6,7 @@ from datetime import date, datetime, timedelta from os import path as p from pathlib import Path from time import sleep -from typing import List, Optional, Tuple, Union +from typing import Any, Callable, List, Optional, Tuple, Union import github import requests @@ -49,38 +49,43 @@ class GitHub(github.Github): """Wrapper around search method with throttling and splitting by date. We split only by the first""" - splittable = False + splittable_arg = "" + splittable_value = [] for arg, value in kwargs.items(): if arg in ["closed", "created", "merged", "updated"]: if hasattr(value, "__iter__") and not isinstance(value, str): - assert [True for v in value if isinstance(v, (date, datetime))] + assert all(True for v in value if isinstance(v, (date, datetime))) assert len(value) == 2 kwargs[arg] = f"{value[0].isoformat()}..{value[1].isoformat()}" - if not splittable: + if not splittable_arg: # We split only by the first met splittable argument - preserved_arg = arg - preserved_value = value middle_value = value[0] + (value[1] - value[0]) / 2 - splittable = middle_value not in value + if middle_value in value: + # When the middle value in itareble value, we can't use it + # to split by dates later + continue + splittable_arg = arg + splittable_value = value continue assert isinstance(value, (date, datetime, str)) inter_result = [] # type: Issues + exception = RateLimitExceededException(0) for i in range(self.retries): try: logger.debug("Search issues, args=%s, kwargs=%s", args, kwargs) result = super().search_issues(*args, **kwargs) - if result.totalCount == 1000 and splittable: + if result.totalCount == 1000 and splittable_arg: # The hard limit is 1000. If it's splittable, then we make # two subrequests requests with less time frames logger.debug( "The search result contain exactly 1000 results, " "splitting %s=%s by middle point %s", - preserved_arg, - kwargs[preserved_arg], + splittable_arg, + kwargs[splittable_arg], middle_value, ) - kwargs[preserved_arg] = [preserved_value[0], middle_value] + kwargs[splittable_arg] = [splittable_value[0], middle_value] inter_result.extend(self.search_issues(*args, **kwargs)) if isinstance(middle_value, date): # When middle_value is a date, 2022-01-01..2022-01-03 @@ -88,9 +93,10 @@ class GitHub(github.Github): # 2022-01-02..2022-01-03, so we have results for # 2022-01-02 twicely. We split it to # 2022-01-01..2022-01-02 and 2022-01-03..2022-01-03. - # 2022-01-01..2022-01-02 aren't split, see splittable + # 2022-01-01..2022-01-02 aren't split, see splittable_arg + # definition above for kwargs.items middle_value += timedelta(days=1) - kwargs[preserved_arg] = [middle_value, preserved_value[1]] + kwargs[splittable_arg] = [middle_value, splittable_value[1]] inter_result.extend(self.search_issues(*args, **kwargs)) return inter_result @@ -104,12 +110,15 @@ class GitHub(github.Github): raise exception # pylint: enable=signature-differs - def get_pulls_from_search(self, *args, **kwargs) -> PullRequests: # type: ignore + def get_pulls_from_search(self, *args: Any, **kwargs: Any) -> PullRequests: """The search api returns actually issues, so we need to fetch PullRequests""" + progress_func = kwargs.pop( + "progress_func", lambda x: x + ) # type: Callable[[Issues], Issues] issues = self.search_issues(*args, **kwargs) repos = {} prs = [] # type: PullRequests - for issue in issues: + for issue in progress_func(issues): # See https://github.com/PyGithub/PyGithub/issues/2202, # obj._rawData doesn't spend additional API requests # pylint: disable=protected-access diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 4abaeac30b7..87f721cfde7 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -18,7 +18,7 @@ from collections import defaultdict from itertools import chain from typing import Any, Dict -from env_helper import CI +from env_helper import IS_CI from integration_test_images import IMAGES MAX_RETRY = 1 @@ -1004,7 +1004,7 @@ def run(): logging.info("Running tests") - if CI: + if IS_CI: # Avoid overlaps with previous runs logging.info("Clearing dmesg before run") subprocess.check_call("sudo -E dmesg --clear", shell=True) @@ -1012,7 +1012,7 @@ def run(): state, description, test_results, _ = runner.run_impl(repo_path, build_path) logging.info("Tests finished") - if CI: + if IS_CI: # Dump dmesg (to capture possible OOMs) logging.info("Dumping dmesg") subprocess.check_call("sudo -E dmesg -T", shell=True) diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 1e61fd9fab7..f91a3f080c0 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -13,7 +13,6 @@ import requests from build_download_helper import ( download_build_with_progress, - get_build_name_for_check, read_build_urls, ) from compress_files import compress_fast @@ -25,6 +24,7 @@ from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from ssh import SSHKey from stopwatch import Stopwatch from tee_popen import TeePopen +from ci_config import CI JEPSEN_GROUP_NAME = "jepsen_group" @@ -224,7 +224,7 @@ def main(): head = requests.head(build_url, timeout=60) assert head.status_code == 200, f"Clickhouse binary not found: {build_url}" else: - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) urls = read_build_urls(build_name, REPORT_PATH) build_url = None for url in urls: diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index f9860f6ad2a..9749122bd39 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -63,7 +63,10 @@ def get_access_token_by_key_app(private_key: str, app_id: int) -> str: "iss": app_id, } - encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") + # FIXME: apparently should be switched to this so that mypy is happy + # jwt_instance = JWT() + # encoded_jwt = jwt_instance.encode(payload, private_key, algorithm="RS256") + encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") # type: ignore installation_id = get_installation_id(encoded_jwt) return get_access_token_by_jwt(encoded_jwt, installation_id) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index e1c7bf94ff5..37c08fc4efe 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -4,6 +4,7 @@ import argparse import logging +import sys from datetime import datetime from os import getenv from pprint import pformat @@ -17,11 +18,14 @@ from commit_status_helper import ( get_commit_filtered_statuses, get_commit, trigger_mergeable_check, + update_upstream_sync_status, ) from get_robot_token import get_best_robot_token from github_helper import GitHub, NamedUser, PullRequest, Repository from pr_info import PRInfo -from report import SUCCESS +from report import SUCCESS, FAILURE +from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY +from synchronizer_utils import SYNC_BRANCH_PREFIX # The team name for accepted approvals TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core") @@ -243,17 +247,29 @@ def main(): repo = gh.get_repo(args.repo) if args.set_ci_status: - assert args.wf_status in ("failure", "success") + assert args.wf_status in (FAILURE, SUCCESS) # set mergeable check status and exit commit = get_commit(gh, args.pr_info.sha) statuses = get_commit_filtered_statuses(commit) - trigger_mergeable_check( + state = trigger_mergeable_check( commit, statuses, - set_if_green=True, workflow_failed=(args.wf_status != "success"), ) - return + + # Process upstream StatusNames.SYNC + pr_info = PRInfo() + if ( + pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + print("Updating upstream statuses") + update_upstream_sync_status(pr_info, state) + + if args.wf_status != "success": + # exit with 1 to rerun on workflow failed job restart + sys.exit(1) + sys.exit(0) # An ugly and not nice fix to patch the wrong organization URL, # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710 diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 0c779b515bd..b3488ac0af2 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -12,7 +12,7 @@ from pathlib import Path from github import Github from build_download_helper import download_builds_filter -from ci_config import CI_CONFIG +from ci_config import CI from clickhouse_helper import get_instance_id, get_instance_type from commit_status_helper import get_commit from docker_images_helper import get_docker_image, pull_image @@ -83,7 +83,7 @@ def main(): assert ( check_name ), "Check name must be provided as an input arg or in CHECK_NAME env" - required_build = CI_CONFIG.test_configs[check_name].required_build + required_build = CI.JOB_CONFIGS[check_name].get_required_build() with open(GITHUB_EVENT_PATH, "r", encoding="utf-8") as event_file: event = json.load(event_file) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index ccf5dc23121..a411fc4e8f6 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -13,8 +13,11 @@ from env_helper import ( GITHUB_REPOSITORY, GITHUB_RUN_URL, GITHUB_SERVER_URL, + GITHUB_UPSTREAM_REPOSITORY, ) from lambda_shared_package.lambda_shared.pr import Labels +from get_robot_token import get_best_robot_token +from github_helper import GitHub NeedsDataType = Dict[str, Dict[str, Union[str, Dict[str, str]]]] @@ -59,7 +62,7 @@ def get_pr_for_commit(sha, ref): data = response.json() our_prs = [] # type: List[Dict] if len(data) > 1: - print("Got more than one pr for commit", sha) + logging.warning("Got more than one pr for commit %s", sha) for pr in data: # We need to check if the PR is created in our repo, because # https://github.com/kaynewu/ClickHouse/pull/2 @@ -71,13 +74,20 @@ def get_pr_for_commit(sha, ref): if pr["head"]["ref"] in ref: return pr our_prs.append(pr) - print( - f"Cannot find PR with required ref {ref}, sha {sha} - returning first one" + logging.warning( + "Cannot find PR with required ref %s, sha %s - returning first one", + ref, + sha, ) first_pr = our_prs[0] return first_pr except Exception as ex: - print(f"Cannot fetch PR info from commit {ref}, {sha}", ex) + logging.error( + "Cannot fetch PR info from commit ref %s, sha %s, exception: %s", + ref, + sha, + ex, + ) return None @@ -259,12 +269,12 @@ class PRInfo: self.diff_urls.append( self.compare_url( pull_request["base"]["repo"]["default_branch"], - pull_request["head"]["label"], + pull_request["head"]["sha"], ) ) self.diff_urls.append( self.compare_url( - pull_request["head"]["label"], + pull_request["head"]["sha"], pull_request["base"]["repo"]["default_branch"], ) ) @@ -279,7 +289,7 @@ class PRInfo: # itself, but as well files changed since we branched out self.diff_urls.append( self.compare_url( - pull_request["head"]["label"], + pull_request["head"]["sha"], pull_request["base"]["repo"]["default_branch"], ) ) @@ -289,8 +299,10 @@ class PRInfo: else: # assume this is a dispatch self.event_type = EventType.DISPATCH - print("event.json does not match pull_request or push:") - print(json.dumps(github_event, sort_keys=True, indent=4)) + logging.warning( + "event.json does not match pull_request or push:\n%s", + json.dumps(github_event, sort_keys=True, indent=4), + ) self.sha = os.getenv( "GITHUB_SHA", "0000000000000000000000000000000000000000" ) @@ -307,7 +319,9 @@ class PRInfo: @property def is_master(self) -> bool: - return self.number == 0 and self.head_ref == "master" + return ( + self.number == 0 and self.head_ref == "master" and not self.is_merge_queue + ) @property def is_release(self) -> bool: @@ -315,7 +329,10 @@ class PRInfo: @property def is_pr(self): - return self.event_type == EventType.PULL_REQUEST + if self.event_type == EventType.PULL_REQUEST: + assert self.number + return True + return False @property def is_scheduled(self) -> bool: @@ -330,7 +347,7 @@ class PRInfo: return self.event_type == EventType.DISPATCH def compare_pr_url(self, pr_object: dict) -> str: - return self.compare_url(pr_object["base"]["label"], pr_object["head"]["label"]) + return self.compare_url(pr_object["base"]["sha"], pr_object["head"]["sha"]) @staticmethod def compare_url(first: str, second: str) -> str: @@ -344,9 +361,6 @@ class PRInfo: if self.changed_files_requested: return - if not getattr(self, "diff_urls", False): - raise TypeError("The event does not have diff URLs") - for diff_url in self.diff_urls: response = get_gh_api( diff_url, @@ -357,7 +371,7 @@ class PRInfo: diff_object = PatchSet(response.text) self.changed_files.update({f.path for f in diff_object}) self.changed_files_requested = True - print(f"Fetched info about {len(self.changed_files)} changed files") + logging.info("Fetched info about %s changed files", len(self.changed_files)) def get_dict(self): return { @@ -421,6 +435,34 @@ class PRInfo: return True return False + def get_latest_sync_commit(self): + gh = GitHub(get_best_robot_token(), per_page=100) + assert self.head_ref.startswith("sync-upstream/pr/") + assert self.repo_full_name != GITHUB_UPSTREAM_REPOSITORY + upstream_repo = gh.get_repo(GITHUB_UPSTREAM_REPOSITORY) + upstream_pr_number = int(self.head_ref.split("/pr/", maxsplit=1)[1]) + upstream_pr = upstream_repo.get_pull(upstream_pr_number) + sync_repo = gh.get_repo(GITHUB_REPOSITORY) + sync_pr = sync_repo.get_pull(self.number) + # Find the commit that is in both repos, upstream and cloud + sync_commits = sync_pr.get_commits().reversed + upstream_commits = upstream_pr.get_commits().reversed + # Github objects are compared by _url attribute. We can't compare them directly and + # should compare commits by SHA1 + upstream_shas = [c.sha for c in upstream_commits] + logging.info("Commits in upstream PR:\n %s", ", ".join(upstream_shas)) + sync_shas = [c.sha for c in sync_commits] + logging.info("Commits in sync PR:\n %s", ", ".join(reversed(sync_shas))) + + # find latest synced commit + last_synced_upstream_commit = None + for commit in upstream_commits: + if commit.sha in sync_shas: + last_synced_upstream_commit = commit + break + assert last_synced_upstream_commit + return last_synced_upstream_commit + class FakePRInfo: def __init__(self): diff --git a/tests/ci/release.py b/tests/ci/release.py index 2ae82177c67..7ab1149310f 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -94,6 +94,7 @@ class Release: self._version = get_version_from_repo(git=self._git) self.release_version = self.version self._release_branch = "" + self._version_new_tag = None # type: Optional[ClickHouseVersion] self._rollback_stack = [] # type: List[str] def run( @@ -180,7 +181,8 @@ class Release: ) raise - self.check_commit_release_ready() + if self.release_type == self.PATCH: + self.check_commit_release_ready() def do( self, check_dirty: bool, check_run_from_master: bool, check_branch: bool @@ -328,10 +330,16 @@ class Release: self.check_no_tags_after() # Create release branch self.read_version() - with self._create_branch(self.release_branch, self.release_commit): - with self._checkout(self.release_branch, True): - with self._bump_release_branch(): - yield + assert self._version_new_tag is not None + with self._create_tag( + self._version_new_tag.describe, + self.release_commit, + f"Initial commit for release {self._version_new_tag.major}.{self._version_new_tag.minor}", + ): + with self._create_branch(self.release_branch, self.release_commit): + with self._checkout(self.release_branch, True): + with self._bump_release_branch(): + yield @contextmanager def patch_release(self): @@ -444,6 +452,11 @@ class Release: self.version.with_description(VersionType.TESTING) self._update_cmake_contributors(self.version) self._commit_cmake_contributors(self.version) + # Create a version-new tag + self._version_new_tag = self.version.copy() + self._version_new_tag.tweak = 1 + self._version_new_tag.with_description(VersionType.NEW) + with self._push(helper_branch): body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") # The following command is rolled back by deleting branch in self._push @@ -458,10 +471,10 @@ class Release: @contextmanager def _checkout(self, ref: str, with_checkout_back: bool = False) -> Iterator[None]: + self._git.update() orig_ref = self._git.branch or self._git.sha - need_rollback = False + rollback_cmd = "" if ref not in (self._git.branch, self._git.sha): - need_rollback = True self.run(f"git checkout {ref}") # checkout is not put into rollback_stack intentionally rollback_cmd = f"git checkout {orig_ref}" @@ -474,7 +487,7 @@ class Release: self.run(f"git reset --hard; git checkout -f {orig_ref}") raise # Normal flow when we need to checkout back - if with_checkout_back and need_rollback: + if with_checkout_back and rollback_cmd: self.run(rollback_cmd) @contextmanager @@ -510,9 +523,9 @@ class Release: @contextmanager def _create_gh_release(self, as_prerelease: bool) -> Iterator[None]: - with self._create_tag(): + tag = self.release_version.describe + with self._create_tag(tag, self.release_commit): # Preserve tag if version is changed - tag = self.release_version.describe prerelease = "" if as_prerelease: prerelease = "--prerelease" @@ -534,13 +547,13 @@ class Release: raise @contextmanager - def _create_tag(self): - tag = self.release_version.describe - self.run( - f"git tag -a -m 'Release {tag}' '{tag}' {self.release_commit}", - dry_run=self.dry_run, - ) - rollback_cmd = f"{self.dry_run_prefix}git tag -d '{tag}'" + def _create_tag( + self, tag: str, commit: str, tag_message: str = "" + ) -> Iterator[None]: + tag_message = tag_message or f"Release {tag}" + # Create tag even in dry-run + self.run(f"git tag -a -m '{tag_message}' '{tag}' {commit}") + rollback_cmd = f"git tag -d '{tag}'" self._rollback_stack.append(rollback_cmd) try: with self._push(tag): diff --git a/tests/ci/report.py b/tests/ci/report.py index ee58efdba52..bdaa2e15130 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -21,7 +21,7 @@ from typing import ( ) from build_download_helper import get_gh_api -from ci_config import CI_CONFIG, BuildConfig +from ci_config import CI from ci_utils import normalize_string from env_helper import REPORT_PATH, TEMP_PATH @@ -412,6 +412,7 @@ class BuildResult: ref_report = None master_report = None any_report = None + Path(REPORT_PATH).mkdir(parents=True, exist_ok=True) for file in Path(REPORT_PATH).iterdir(): if f"{build_name}.json" in file.name: any_report = file @@ -448,8 +449,10 @@ class BuildResult: return json.dumps(asdict(self), indent=2) @property - def build_config(self) -> Optional[BuildConfig]: - return CI_CONFIG.build_config.get(self.build_name, None) + def build_config(self) -> Optional[CI.BuildConfig]: + if self.build_name not in CI.JOB_CONFIGS: + return None + return CI.JOB_CONFIGS[self.build_name].build_config @property def comment(self) -> str: diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 9d9d1433073..00942352dde 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,7 +5,6 @@ from typing import Tuple from github import Github -from ci_config import StatusNames from commit_status_helper import ( create_ci_report, format_description, @@ -23,7 +22,8 @@ from lambda_shared_package.lambda_shared.pr import ( check_pr_description, ) from pr_info import PRInfo -from report import FAILURE, PENDING, SUCCESS +from report import FAILURE, PENDING, SUCCESS, StatusType +from ci_config import CI TRUSTED_ORG_IDS = { 54801242, # clickhouse @@ -58,7 +58,7 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): # Returns can_run, description def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str]: # Consider the labels and whether the user is trusted. - print("Got labels", pr_info.labels) + logging.info("Got labels: %s", pr_info.labels) if OK_SKIP_LABELS.intersection(pr_info.labels): return True, "Don't try new checks for release/backports/cherry-picks" @@ -66,9 +66,10 @@ def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str]: if Labels.CAN_BE_TESTED not in pr_info.labels and not pr_is_by_trusted_user( pr_info.user_login, pr_info.user_orgs ): - print( - f"PRs by untrusted users need the '{Labels.CAN_BE_TESTED}' label - " - "please contact a member of the core team" + logging.info( + "PRs by untrusted users need the '%s' label - " + "please contact a member of the core team", + Labels.CAN_BE_TESTED, ) return False, "Needs 'can be tested' label" @@ -93,6 +94,7 @@ def main(): description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) + status = SUCCESS # type: StatusType description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY) pr_labels_to_add = [] @@ -125,13 +127,16 @@ def main(): f"::notice :: Add backport labels [{backport_labels}] for a given PR category" ) - print(f"Change labels: add {pr_labels_to_add}, remove {pr_labels_to_remove}") + logging.info( + "Change labels: add %s, remove %s", pr_labels_to_add, pr_labels_to_remove + ) if pr_labels_to_add: post_labels(gh, pr_info, pr_labels_to_add) if pr_labels_to_remove: remove_labels(gh, pr_info, pr_labels_to_remove) + # 1. Next three IFs are in a correct order. First - fatal error if description_error: print( "::error ::Cannot run, PR description does not match the template: " @@ -146,9 +151,10 @@ def main(): f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/" "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1" ) + status = FAILURE post_commit_status( commit, - FAILURE, + status, url, format_description(description_error), PR_CHECK, @@ -156,41 +162,38 @@ def main(): ) sys.exit(1) + # 2. Then we check if the documentation is not created to fail the Mergeable check if ( Labels.PR_FEATURE in pr_info.labels and not pr_info.has_changes_in_documentation() ): print( - f"The '{Labels.PR_FEATURE}' in the labels, " + f"::error ::The '{Labels.PR_FEATURE}' in the labels, " "but there's no changed documentation" ) - post_commit_status( - commit, - FAILURE, - "", - f"expect adding docs for {Labels.PR_FEATURE}", - PR_CHECK, - pr_info, - ) - # allow the workflow to continue + status = FAILURE + description = f"expect adding docs for {Labels.PR_FEATURE}" + # 3. But we allow the workflow to continue + # 4. And post only a single commit status on a failure if not can_run: post_commit_status( commit, - FAILURE, + status, "", description, PR_CHECK, pr_info, ) - print("::notice ::Cannot run") + print("::error ::Cannot run") sys.exit(1) + # The status for continue can be posted only one time, not more. post_commit_status( commit, - SUCCESS, + status, "", - "ok", + description, PR_CHECK, pr_info, ) @@ -205,7 +208,7 @@ def main(): PENDING, ci_report_url, description, - StatusNames.CI, + CI.StatusNames.CI, pr_info, ) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index bff53f00ad3..86656e6e7c0 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -11,7 +11,7 @@ import boto3 # type: ignore import botocore # type: ignore from compress_files import compress_file_fast from env_helper import ( - CI, + IS_CI, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD, @@ -104,20 +104,20 @@ class S3Helper: self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) url = self.s3_url(bucket_name, s3_path) - logging.info("Upload %s to %s. Meta: %s", file_path, url, metadata) + logging.info("Upload %s to %s Meta: %s", file_path, url, metadata) return url def delete_file_from_s3(self, bucket_name: str, s3_path: str) -> None: self.client.delete_object(Bucket=bucket_name, Key=s3_path) def upload_test_report_to_s3(self, file_path: Path, s3_path: str) -> str: - if CI: + if IS_CI: return self._upload_file_to_s3(S3_TEST_REPORTS_BUCKET, file_path, s3_path) return S3Helper.copy_file_to_local(S3_TEST_REPORTS_BUCKET, file_path, s3_path) def upload_build_file_to_s3(self, file_path: Path, s3_path: str) -> str: - if CI: + if IS_CI: return self._upload_file_to_s3(S3_BUILDS_BUCKET, file_path, s3_path) return S3Helper.copy_file_to_local(S3_BUILDS_BUCKET, file_path, s3_path) @@ -255,7 +255,7 @@ class S3Helper: if full_fs_path.is_symlink(): if upload_symlinks: - if CI: + if IS_CI: return self._upload_file_to_s3( bucket_name, full_fs_path, @@ -266,7 +266,7 @@ class S3Helper: ) return [] - if CI: + if IS_CI: return self._upload_file_to_s3( bucket_name, full_fs_path, full_s3_path + "/" + file_path.name ) @@ -331,7 +331,7 @@ class S3Helper: return result def url_if_exists(self, key: str, bucket: str = S3_BUILDS_BUCKET) -> str: - if not CI: + if not IS_CI: local_path = self.local_path(bucket, key) if local_path.exists(): return local_path.as_uri() @@ -345,7 +345,7 @@ class S3Helper: @staticmethod def get_url(bucket: str, key: str) -> str: - if CI: + if IS_CI: return S3Helper.s3_url(bucket, key) return S3Helper.local_path(bucket, key).as_uri() diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 9d33c480598..a68db8b9791 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -6,12 +6,13 @@ import subprocess import sys from pathlib import Path -from build_download_helper import get_build_name_for_check, read_build_urls +from build_download_helper import read_build_urls from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import REPORT_PATH, TEMP_PATH from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from stopwatch import Stopwatch from tee_popen import TeePopen +from ci_config import CI IMAGE_NAME = "clickhouse/sqlancer-test" @@ -43,7 +44,7 @@ def main(): docker_image = pull_image(get_docker_image(IMAGE_NAME)) - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) urls = read_build_urls(build_name, reports_path) if not urls: raise ValueError("No build URLs found") diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py index c8c2adbbd56..8e6ca6ff87f 100644 --- a/tests/ci/sqltest.py +++ b/tests/ci/sqltest.py @@ -6,12 +6,13 @@ import subprocess import sys from pathlib import Path -from build_download_helper import get_build_name_for_check, read_build_urls +from build_download_helper import read_build_urls from docker_images_helper import get_docker_image, pull_image from env_helper import REPORT_PATH, TEMP_PATH from pr_info import PRInfo from report import SUCCESS, JobReport, TestResult from stopwatch import Stopwatch +from ci_config import CI IMAGE_NAME = "clickhouse/sqltest" @@ -49,7 +50,7 @@ def main(): docker_image = pull_image(get_docker_image(IMAGE_NAME)) - build_name = get_build_name_for_check(check_name) + build_name = CI.get_required_build_name(check_name) print(build_name) urls = read_build_urls(build_name, reports_path) if not urls: diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9906d87a8c0..9deae06d9f4 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -13,7 +13,7 @@ from typing import List, Tuple, Union import magic from docker_images_helper import get_docker_image, pull_image -from env_helper import CI, REPO_COPY, TEMP_PATH +from env_helper import IS_CI, REPO_COPY, TEMP_PATH, GITHUB_EVENT_PATH from git_helper import GIT_PREFIX, git_runner from pr_info import PRInfo from report import ERROR, FAILURE, SUCCESS, JobReport, TestResults, read_test_results @@ -152,7 +152,7 @@ def main(): run_cpp_check = True run_shell_check = True run_python_check = True - if CI and pr_info.number > 0: + if IS_CI and pr_info.number > 0: pr_info.fetch_changed_files() run_cpp_check = any( not (is_python(file) or is_shell(file)) for file in pr_info.changed_files @@ -216,7 +216,8 @@ def main(): status=state, start_time=stopwatch.start_time_str, duration=stopwatch.duration_seconds, - additional_files=additional_files, + # add GITHUB_EVENT_PATH json file to have it in style check report. sometimes it's needed for debugging. + additional_files=additional_files + [Path(GITHUB_EVENT_PATH)], ).dump() if state in [ERROR, FAILURE]: diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index f1073603e8d..8251ccbaf38 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -5,12 +5,12 @@ import argparse import sys -from ci_config import StatusNames from commit_status_helper import get_commit, post_commit_status from get_robot_token import get_best_robot_token from github_helper import GitHub from pr_info import PRInfo from report import SUCCESS +from ci_config import CI def parse_args() -> argparse.Namespace: @@ -75,7 +75,7 @@ def set_sync_status(gh, pr_info, sync_pr): if sync_pr.mergeable_state == "clean": print(f"Sync PR [{sync_pr.number}] is clean") post_commit_status( - get_commit(gh, pr_info.sha), SUCCESS, "", "", StatusNames.SYNC + get_commit(gh, pr_info.sha), SUCCESS, "", "", CI.StatusNames.SYNC ) else: print( diff --git a/tests/ci/test_ci_cache.py b/tests/ci/test_ci_cache.py index b1be0709803..81d649b246b 100644 --- a/tests/ci/test_ci_cache.py +++ b/tests/ci/test_ci_cache.py @@ -5,12 +5,12 @@ from pathlib import Path import shutil from typing import Dict, Set import unittest -from ci_config import Build, JobNames from s3_helper import S3Helper from ci_cache import CiCache from digest_helper import JOB_DIGEST_LEN from commit_status_helper import CommitStatusData from env_helper import S3_BUILDS_BUCKET, TEMP_PATH +from ci_config import CI def _create_mock_digest_1(string): @@ -21,8 +21,8 @@ def _create_mock_digest_2(string): return md5((string + "+nonce").encode("utf-8")).hexdigest()[:JOB_DIGEST_LEN] -DIGESTS = {job: _create_mock_digest_1(job) for job in JobNames} -DIGESTS2 = {job: _create_mock_digest_2(job) for job in JobNames} +DIGESTS = {job: _create_mock_digest_1(job) for job in CI.JobNames} +DIGESTS2 = {job: _create_mock_digest_2(job) for job in CI.JobNames} # pylint:disable=protected-access @@ -84,8 +84,10 @@ class TestCiCache(unittest.TestCase): NUM_BATCHES = 10 DOCS_JOBS_NUM = 1 - assert len(set(job for job in JobNames)) == len(list(job for job in JobNames)) - NONDOCS_JOBS_NUM = len(set(job for job in JobNames)) - DOCS_JOBS_NUM + assert len(set(job for job in CI.JobNames)) == len( + list(job for job in CI.JobNames) + ) + NONDOCS_JOBS_NUM = len(set(job for job in CI.JobNames)) - DOCS_JOBS_NUM PR_NUM = 123456 status = CommitStatusData( @@ -97,13 +99,13 @@ class TestCiCache(unittest.TestCase): ) ### add some pending statuses for two batches, non-release branch - for job in JobNames: + for job in CI.JobNames: ci_cache.push_pending(job, [0, 1, 2], NUM_BATCHES, release_branch=False) ci_cache_2.push_pending(job, [0, 1, 2], NUM_BATCHES, release_branch=False) ### add success status for 0 batch, non-release branch batch = 0 - for job in JobNames: + for job in CI.JobNames: ci_cache.push_successful( job, batch, NUM_BATCHES, status, release_branch=False ) @@ -113,21 +115,17 @@ class TestCiCache(unittest.TestCase): ### add failed status for 2 batch, non-release branch batch = 2 - for job in JobNames: + for job in CI.JobNames: ci_cache.push_failed(job, batch, NUM_BATCHES, status, release_branch=False) ci_cache_2.push_failed( job, batch, NUM_BATCHES, status, release_branch=False ) ### check all expected directories were created on s3 mock - expected_build_path_1 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_1(Build.PACKAGE_RELEASE)}" - expected_docs_path_1 = ( - f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_1(JobNames.DOCS_CHECK)}" - ) - expected_build_path_2 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_2(Build.PACKAGE_RELEASE)}" - expected_docs_path_2 = ( - f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_2(JobNames.DOCS_CHECK)}" - ) + expected_build_path_1 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_1(CI.BuildNames.PACKAGE_RELEASE)}" + expected_docs_path_1 = f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_1(CI.JobNames.DOCS_CHECK)}" + expected_build_path_2 = f"{CiCache.JobType.SRCS.value}-{_create_mock_digest_2(CI.BuildNames.PACKAGE_RELEASE)}" + expected_docs_path_2 = f"{CiCache.JobType.DOCS.value}-{_create_mock_digest_2(CI.JobNames.DOCS_CHECK)}" self.assertCountEqual( list(s3_mock.files_on_s3_paths.keys()), [ @@ -174,7 +172,7 @@ class TestCiCache(unittest.TestCase): ) ### check statuses for all jobs in cache - for job in JobNames: + for job in CI.JobNames: self.assertEqual( ci_cache.is_successful(job, 0, NUM_BATCHES, release_branch=False), True ) @@ -212,7 +210,7 @@ class TestCiCache(unittest.TestCase): assert status2 is None ### add some more pending statuses for two batches and for a release branch - for job in JobNames: + for job in CI.JobNames: ci_cache.push_pending( job, batches=[0, 1], num_batches=NUM_BATCHES, release_branch=True ) @@ -226,7 +224,7 @@ class TestCiCache(unittest.TestCase): sha="deadbeaf2", pr_num=PR_NUM, ) - for job in JobNames: + for job in CI.JobNames: ci_cache.push_successful(job, 0, NUM_BATCHES, status, release_branch=True) ### check number of cache files is as expected @@ -249,7 +247,7 @@ class TestCiCache(unittest.TestCase): ) ### check statuses - for job in JobNames: + for job in CI.JobNames: self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, False), True) self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, True), True) self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, False), False) @@ -273,7 +271,7 @@ class TestCiCache(unittest.TestCase): ### create new cache object and verify the same checks ci_cache = CiCache(s3_mock, DIGESTS) - for job in JobNames: + for job in CI.JobNames: self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, False), True) self.assertEqual(ci_cache.is_successful(job, 0, NUM_BATCHES, True), True) self.assertEqual(ci_cache.is_successful(job, 1, NUM_BATCHES, False), False) diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index badbc4c5dcf..47247b91858 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -1,30 +1,491 @@ #!/usr/bin/env python3 import unittest -from ci_config import CIStages, JobNames, CI_CONFIG, Runners +from ci_config import CI +import ci as CIPY +from ci_settings import CiSettings +from pr_info import PRInfo, EventType +from s3_helper import S3Helper +from ci_cache import CiCache +from ci_utils import normalize_string + + +_TEST_EVENT_JSON = {"dummy": "dummy"} + +# pylint:disable=protected-access,union-attr class TestCIConfig(unittest.TestCase): def test_runner_config(self): """check runner is provided w/o exception""" - for job in JobNames: - runner = CI_CONFIG.get_runner_type(job) - self.assertIn(runner, Runners) + for job in CI.JobNames: + self.assertIn(CI.JOB_CONFIGS[job].runner_type, CI.Runners) + if ( + job + in ( + CI.JobNames.STYLE_CHECK, + CI.JobNames.BUILD_CHECK, + ) + or "jepsen" in job.lower() + ): + self.assertTrue( + "style" in CI.JOB_CONFIGS[job].runner_type, + f"Job [{job}] must have style-checker(-aarch64) runner", + ) + elif "binary_" in job.lower() or "package_" in job.lower(): + self.assertTrue( + CI.JOB_CONFIGS[job].runner_type == CI.Runners.BUILDER, + f"Job [{job}] must have [{CI.Runners.BUILDER}] runner", + ) + elif "aarch64" in job.lower(): + self.assertTrue( + "aarch" in CI.JOB_CONFIGS[job].runner_type, + f"Job [{job}] does not match runner [{CI.JOB_CONFIGS[job].runner_type}]", + ) + else: + self.assertTrue( + "aarch" not in CI.JOB_CONFIGS[job].runner_type, + f"Job [{job}] does not match runner [{CI.JOB_CONFIGS[job].runner_type}]", + ) + + def test_common_configs_applied_properly(self): + for job in CI.JobNames: + if CI.JOB_CONFIGS[job].job_name_keyword: + self.assertTrue( + CI.JOB_CONFIGS[job].job_name_keyword.lower() + in normalize_string(job), + f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]", + ) + + def test_required_checks(self): + for job in CI.REQUIRED_CHECKS: + if job in (CI.StatusNames.PR_CHECK, CI.StatusNames.SYNC): + continue + self.assertTrue(job in CI.JOB_CONFIGS, f"Job [{job}] not in job config") + + def test_builds_configs(self): + """build name in the build config must match the job name""" + for job in CI.JobNames: + self.assertTrue(job in CI.JOB_CONFIGS) + self.assertTrue(CI.JOB_CONFIGS[job].runner_type in CI.Runners) + if job in CI.BuildNames: + self.assertTrue(CI.JOB_CONFIGS[job].build_config.name == job) + self.assertTrue(CI.JOB_CONFIGS[job].required_builds is None) + else: + self.assertTrue(CI.JOB_CONFIGS[job].build_config is None) + if "asan" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_ASAN, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "msan" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_MSAN, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "tsan" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_TSAN, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "ubsan" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_UBSAN, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "debug" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_DEBUG, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "release" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + in ( + CI.BuildNames.PACKAGE_RELEASE, + CI.BuildNames.BINARY_RELEASE, + ), + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "coverage" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_RELEASE_COVERAGE, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "aarch" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_AARCH64, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "amd64" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_RELEASE, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "uzzer" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] == CI.BuildNames.FUZZERS, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "Docker" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + in ( + CI.BuildNames.PACKAGE_RELEASE, + CI.BuildNames.PACKAGE_AARCH64, + ), + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "SQLTest" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + == CI.BuildNames.PACKAGE_RELEASE, + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif "Jepsen" in job: + self.assertTrue( + CI.JOB_CONFIGS[job].required_builds[0] + in ( + CI.BuildNames.PACKAGE_RELEASE, + CI.BuildNames.BINARY_RELEASE, + ), + f"Job [{job}] probably has wrong required build [{CI.JOB_CONFIGS[job].required_builds[0]}] in JobConfig", + ) + elif job in ( + CI.JobNames.STYLE_CHECK, + CI.JobNames.FAST_TEST, + CI.JobNames.BUILD_CHECK, + CI.JobNames.DOCS_CHECK, + CI.JobNames.BUGFIX_VALIDATE, + ): + self.assertTrue(CI.JOB_CONFIGS[job].required_builds is None) + else: + print(f"Job [{job}] required build not checked") def test_job_stage_config(self): - """check runner is provided w/o exception""" - for job in JobNames: - stage = CI_CONFIG.get_job_ci_stage(job) - if job in [ - JobNames.STYLE_CHECK, - JobNames.FAST_TEST, - JobNames.JEPSEN_KEEPER, - JobNames.BUILD_CHECK, - JobNames.BUILD_CHECK_SPECIAL, - ]: - assert ( - stage == CIStages.NA - ), "These jobs are not in CI stages, must be NA" + """ + check runner is provided w/o exception + """ + # check stages + for job in CI.JobNames: + if job in CI.BuildNames: + self.assertTrue( + CI.get_job_ci_stage(job) + in (CI.WorkflowStages.BUILDS_1, CI.WorkflowStages.BUILDS_2) + ) else: - assert stage != CIStages.NA, f"stage not found for [{job}]" - self.assertIn(stage, CIStages) + if job in ( + CI.JobNames.STYLE_CHECK, + CI.JobNames.FAST_TEST, + CI.JobNames.JEPSEN_SERVER, + CI.JobNames.JEPSEN_KEEPER, + CI.JobNames.BUILD_CHECK, + ): + self.assertEqual( + CI.get_job_ci_stage(job), + CI.WorkflowStages.NA, + msg=f"Stage for [{job}] is not correct", + ) + else: + self.assertTrue( + CI.get_job_ci_stage(job) + in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_3), + msg=f"Stage for [{job}] is not correct", + ) + + def test_job_stage_config_non_blocking(self): + """ + check runner is provided w/o exception + """ + # check stages + for job in CI.JobNames: + if job in CI.BuildNames: + self.assertTrue( + CI.get_job_ci_stage(job) + in (CI.WorkflowStages.BUILDS_1, CI.WorkflowStages.BUILDS_2) + ) + else: + if job in ( + CI.JobNames.STYLE_CHECK, + CI.JobNames.FAST_TEST, + CI.JobNames.JEPSEN_SERVER, + CI.JobNames.JEPSEN_KEEPER, + CI.JobNames.BUILD_CHECK, + ): + self.assertEqual( + CI.get_job_ci_stage(job), + CI.WorkflowStages.NA, + msg=f"Stage for [{job}] is not correct", + ) + else: + self.assertTrue( + CI.get_job_ci_stage(job, non_blocking_ci=True) + in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_2), + msg=f"Stage for [{job}] is not correct", + ) + + def test_build_jobs_configs(self): + """ + check build jobs have non-None build_config attribute + check test jobs have None build_config attribute + """ + for job in CI.JobNames: + if job in CI.BuildNames: + self.assertTrue( + isinstance(CI.JOB_CONFIGS[job].build_config, CI.BuildConfig) + ) + else: + self.assertTrue(CI.JOB_CONFIGS[job].build_config is None) + + def test_ci_py_for_pull_request(self): + """ + checks ci.py job configuration + """ + settings = CiSettings() + settings.no_ci_cache = True + settings.ci_sets = [CI.Tags.CI_SET_BUILDS] + settings.include_keywords = [ + "package", + "integration", + "upgrade", + "clickHouse_build_check", + "stateless", + ] + settings.exclude_keywords = ["asan", "aarch64"] + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + # make it pull request info + pr_info.event_type = EventType.PULL_REQUEST + pr_info.number = 12345 + assert pr_info.is_pr and not pr_info.is_release and not pr_info.is_master + assert not pr_info.is_merge_queue + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + actual_jobs_to_do = list(ci_cache.jobs_to_do) + expected_jobs_to_do = [] + for set_ in settings.ci_sets: + tag_config = CI.get_tag_config(set_) + assert tag_config + set_jobs = tag_config.run_jobs + for job in set_jobs: + if any(k in normalize_string(job) for k in settings.exclude_keywords): + continue + expected_jobs_to_do.append(job) + for job, config in CI.JOB_CONFIGS.items(): + if not any( + keyword in normalize_string(job) + for keyword in settings.include_keywords + ): + continue + if any( + keyword in normalize_string(job) + for keyword in settings.exclude_keywords + ): + continue + if config.random_bucket: + continue + if job not in expected_jobs_to_do: + expected_jobs_to_do.append(job) + + random_buckets = [] + for job, config in ci_cache.jobs_to_do.items(): + if config.random_bucket: + self.assertTrue( + config.random_bucket not in random_buckets, + "Only one job must be picked up from each random bucket", + ) + random_buckets.append(config.random_bucket) + actual_jobs_to_do.remove(job) + + self.assertCountEqual(expected_jobs_to_do, actual_jobs_to_do) + + def test_ci_py_for_pull_request_no_settings(self): + """ + checks ci.py job configuration in PR with empty ci_settings + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + # make it pull request info + pr_info.event_type = EventType.PULL_REQUEST + pr_info.number = 12345 + assert pr_info.is_pr and not pr_info.is_release and not pr_info.is_master + assert not pr_info.is_merge_queue + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + actual_jobs_to_do = list(ci_cache.jobs_to_do) + expected_jobs_to_do = [] + for job, config in CI.JOB_CONFIGS.items(): + if config.random_bucket: + continue + if config.release_only: + continue + if config.run_by_label: + continue + expected_jobs_to_do.append(job) + + random_buckets = [] + for job, config in ci_cache.jobs_to_do.items(): + if config.random_bucket: + self.assertTrue( + config.random_bucket not in random_buckets, + "Only one job must be picked up from each random bucket", + ) + random_buckets.append(config.random_bucket) + actual_jobs_to_do.remove(job) + + self.assertCountEqual(expected_jobs_to_do, actual_jobs_to_do) + + def test_ci_py_for_master(self): + """ + checks ci.py job configuration + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + pr_info.event_type = EventType.PUSH + assert pr_info.number == 0 and pr_info.is_release and not pr_info.is_merge_queue + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + actual_jobs_to_do = list(ci_cache.jobs_to_do) + expected_jobs_to_do = [] + for job, config in CI.JOB_CONFIGS.items(): + if config.pr_only: + continue + if config.run_by_label: + continue + if job in CI.MQ_JOBS: + continue + expected_jobs_to_do.append(job) + self.assertCountEqual(expected_jobs_to_do, actual_jobs_to_do) + + def test_ci_py_for_merge_queue(self): + """ + checks ci.py job configuration + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + # make it merge_queue + pr_info.event_type = EventType.MERGE_QUEUE + assert ( + pr_info.number == 0 + and pr_info.is_merge_queue + and not pr_info.is_release + and not pr_info.is_master + and not pr_info.is_pr + ) + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + actual_jobs_to_do = list(ci_cache.jobs_to_do) + expected_jobs_to_do = [ + "Style check", + "Fast test", + "binary_release", + "Unit tests (release)", + ] + self.assertCountEqual(expected_jobs_to_do, actual_jobs_to_do) + + def test_ci_py_await(self): + """ + checks ci.py job configuration + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + pr_info.event_type = EventType.PUSH + pr_info.number = 0 + assert pr_info.is_release and not pr_info.is_merge_queue + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list") + all_jobs_in_wf = list(ci_cache.jobs_to_do) + assert not ci_cache.jobs_to_wait + ci_cache.await_pending_jobs(is_release=pr_info.is_release, dry_run=True) + assert not ci_cache.jobs_to_skip + assert not ci_cache.jobs_to_wait + + # pretend there are pending jobs that we neet to wait + ci_cache.jobs_to_wait = dict(ci_cache.jobs_to_do) + for job, config in ci_cache.jobs_to_wait.items(): + assert not config.pending_batches + assert config.batches + config.pending_batches = list(config.batches) + for job, config in ci_cache.jobs_to_wait.items(): + for batch in range(config.num_batches): + record = CiCache.Record( + record_type=CiCache.RecordType.PENDING, + job_name=job, + job_digest=ci_cache.job_digests[job], + batch=batch, + num_batches=config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.PENDING.value: + records_[record.to_str_key()] = record + + def _test_await_for_batch( + ci_cache: CiCache, record_type: CiCache.RecordType, batch: int + ) -> None: + assert ci_cache.jobs_to_wait + for job_, config_ in ci_cache.jobs_to_wait.items(): + record = CiCache.Record( + record_type=record_type, + job_name=job_, + job_digest=ci_cache.job_digests[job_], + batch=batch, + num_batches=config_.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == record_type.value: + records_[record.to_str_key()] = record + # await + ci_cache.await_pending_jobs(is_release=pr_info.is_release, dry_run=True) + for _, config_ in ci_cache.jobs_to_wait.items(): + assert config_.pending_batches + if ( + record_type != CiCache.RecordType.PENDING + and batch < config_.num_batches + ): + assert batch not in config_.pending_batches + else: + assert batch in config_.pending_batches + + for _, config_ in ci_cache.jobs_to_do.items(): + # jobs to do must have batches to run before/after await + # if it's an empty list after await - apparently job has not been removed after await + assert config_.batches + + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 0) + # check all one-batch jobs are in jobs_to_skip + for job in all_jobs_in_wf: + config = CI.JOB_CONFIGS[job] + if config.num_batches == 1: + self.assertTrue(job in ci_cache.jobs_to_skip) + self.assertTrue(job not in ci_cache.jobs_to_do) + else: + self.assertTrue(job not in ci_cache.jobs_to_skip) + self.assertTrue(job in ci_cache.jobs_to_do) + + _test_await_for_batch(ci_cache, CiCache.RecordType.FAILED, 1) + _test_await_for_batch(ci_cache, CiCache.RecordType.SUCCESSFUL, 2) + + self.assertTrue(len(ci_cache.jobs_to_skip) > 0) + self.assertTrue(len(ci_cache.jobs_to_do) > 0) + self.assertCountEqual( + list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, all_jobs_in_wf + ) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index c00cd0b9216..3f158e79f30 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -4,7 +4,7 @@ import unittest from ci_settings import CiSettings -from ci_config import JobConfig +from ci_config import CI _TEST_BODY_1 = """ #### Run only: @@ -19,6 +19,7 @@ _TEST_BODY_1 = """ #### CI options: - [ ] do not test (only style check) +- [x] Woolen Wolfdog CI - [x] disable merge-commit (no merge from master before tests) - [ ] disable CI cache (job reuse) @@ -64,8 +65,8 @@ _TEST_JOB_LIST = [ "fuzzers", "Docker server image", "Docker keeper image", - "Install packages (amd64)", - "Install packages (arm64)", + "Install packages (release)", + "Install packages (aarch64)", "Stateless tests (debug)", "Stateless tests (release)", "Stateless tests (coverage)", @@ -120,19 +121,18 @@ _TEST_JOB_LIST = [ "AST fuzzer (ubsan)", "ClickHouse Keeper Jepsen", "ClickHouse Server Jepsen", - "Performance Comparison", - "Performance Comparison Aarch64", + "Performance Comparison (release)", + "Performance Comparison (aarch64)", "Sqllogic test (release)", "SQLancer (release)", "SQLancer (debug)", "SQLTest", - "Compatibility check (amd64)", + "Compatibility check (release)", "Compatibility check (aarch64)", - "ClickBench (amd64)", + "ClickBench (release)", "ClickBench (aarch64)", "libFuzzer tests", - "ClickHouse build check", - "ClickHouse special build check", + "Builds", "Docs check", "Bugfix validation", ] @@ -148,6 +148,7 @@ class TestCIOptions(unittest.TestCase): self.assertFalse(ci_options.do_not_test) self.assertFalse(ci_options.no_ci_cache) self.assertTrue(ci_options.no_merge_commit) + self.assertTrue(ci_options.woolen_wolfdog) self.assertEqual(ci_options.ci_sets, ["ci_set_non_required"]) self.assertCountEqual(ci_options.include_keywords, ["foo", "foo_bar"]) self.assertCountEqual(ci_options.exclude_keywords, ["foo", "foo_bar"]) @@ -157,6 +158,7 @@ class TestCIOptions(unittest.TestCase): ci_options = CiSettings.create_from_pr_message( _TEST_BODY_2, update_from_api=False ) + self.assertFalse(ci_options.woolen_wolfdog) self.assertCountEqual( ci_options.include_keywords, ["integration", "foo_bar", "stateless", "azure"], @@ -166,7 +168,10 @@ class TestCIOptions(unittest.TestCase): ["tsan", "foobar", "aarch64", "analyzer", "s3_storage", "coverage"], ) - jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST} + jobs_configs = { + job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) + for job in _TEST_JOB_LIST + } jobs_configs[ "fuzzers" ].run_by_label = ( @@ -179,7 +184,11 @@ class TestCIOptions(unittest.TestCase): ) filtered_jobs = list( ci_options.apply( - jobs_configs, is_release=False, is_pr=True, labels=["TEST_LABEL"] + jobs_configs, + is_release=False, + is_pr=True, + is_mq=False, + labels=["TEST_LABEL"], ) ) self.assertCountEqual( @@ -206,13 +215,18 @@ class TestCIOptions(unittest.TestCase): ) def test_options_applied_2(self): - jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST_2} + jobs_configs = { + job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) + for job in _TEST_JOB_LIST_2 + } jobs_configs["Style check"].release_only = True jobs_configs["Fast test"].pr_only = True jobs_configs["fuzzers"].run_by_label = "TEST_LABEL" # no settings are set filtered_jobs = list( - CiSettings().apply(jobs_configs, is_release=False, is_pr=True, labels=[]) + CiSettings().apply( + jobs_configs, is_release=False, is_pr=False, is_mq=True, labels=[] + ) ) self.assertCountEqual( filtered_jobs, @@ -220,9 +234,21 @@ class TestCIOptions(unittest.TestCase): "Fast test", ], ) - filtered_jobs = list( - CiSettings().apply(jobs_configs, is_release=True, is_pr=False, labels=[]) + CiSettings().apply( + jobs_configs, is_release=False, is_pr=True, is_mq=False, labels=[] + ) + ) + self.assertCountEqual( + filtered_jobs, + [ + "Fast test", + ], + ) + filtered_jobs = list( + CiSettings().apply( + jobs_configs, is_release=True, is_pr=False, is_mq=False, labels=[] + ) ) self.assertCountEqual( filtered_jobs, @@ -234,13 +260,20 @@ class TestCIOptions(unittest.TestCase): def test_options_applied_3(self): ci_settings = CiSettings() ci_settings.include_keywords = ["Style"] - jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST_2} + jobs_configs = { + job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) + for job in _TEST_JOB_LIST_2 + } jobs_configs["Style check"].release_only = True jobs_configs["Fast test"].pr_only = True # no settings are set filtered_jobs = list( ci_settings.apply( - jobs_configs, is_release=False, is_pr=True, labels=["TEST_LABEL"] + jobs_configs, + is_release=False, + is_pr=True, + is_mq=False, + labels=["TEST_LABEL"], ) ) self.assertCountEqual( @@ -253,7 +286,11 @@ class TestCIOptions(unittest.TestCase): ci_settings.include_keywords = ["Fast"] filtered_jobs = list( ci_settings.apply( - jobs_configs, is_release=True, is_pr=False, labels=["TEST_LABEL"] + jobs_configs, + is_release=True, + is_pr=False, + is_mq=False, + labels=["TEST_LABEL"], ) ) self.assertCountEqual( @@ -270,14 +307,21 @@ class TestCIOptions(unittest.TestCase): ) self.assertCountEqual(ci_options.include_keywords, ["analyzer"]) self.assertIsNone(ci_options.exclude_keywords) - jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST} + jobs_configs = { + job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) + for job in _TEST_JOB_LIST + } jobs_configs[ "fuzzers" ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result jobs_configs["Integration tests (asan)"].release_only = True filtered_jobs = list( ci_options.apply( - jobs_configs, is_release=False, is_pr=True, labels=["TEST_LABEL"] + jobs_configs, + is_release=False, + is_pr=True, + is_mq=False, + labels=["TEST_LABEL"], ) ) self.assertCountEqual( diff --git a/tests/ci/test_git.py b/tests/ci/test_git.py index 3aedd8a8dea..60cd95b6869 100644 --- a/tests/ci/test_git.py +++ b/tests/ci/test_git.py @@ -1,10 +1,11 @@ #!/usr/bin/env python -from unittest.mock import patch import os.path as p import unittest +from dataclasses import dataclass +from unittest.mock import patch -from git_helper import Git, Runner, CWD +from git_helper import CWD, Git, Runner, git_runner class TestRunner(unittest.TestCase): @@ -35,8 +36,10 @@ class TestRunner(unittest.TestCase): class TestGit(unittest.TestCase): def setUp(self): """we use dummy git object""" + # get the git_runner's cwd to set it properly before the Runner is patched + _ = git_runner.cwd run_patcher = patch("git_helper.Runner.run", return_value="") - self.run_mock = run_patcher.start() + run_mock = run_patcher.start() self.addCleanup(run_patcher.stop) update_patcher = patch("git_helper.Git.update") update_mock = update_patcher.start() @@ -44,15 +47,13 @@ class TestGit(unittest.TestCase): self.git = Git() update_mock.assert_called_once() self.git.run("test") - self.run_mock.assert_called_once() - self.git.new_branch = "NEW_BRANCH_NAME" - self.git.new_tag = "v21.12.333.22222-stable" + run_mock.assert_called_once() self.git.branch = "old_branch" self.git.sha = "" self.git.sha_short = "" self.git.latest_tag = "" - self.git.description = "" - self.git.commits_since_tag = 0 + self.git.commits_since_latest = 0 + self.git.commits_since_new = 0 def test_tags(self): self.git.new_tag = "v21.12.333.22222-stable" @@ -71,11 +72,30 @@ class TestGit(unittest.TestCase): setattr(self.git, tag_attr, tag) def test_tweak(self): - self.git.commits_since_tag = 0 - self.assertEqual(self.git.tweak, 1) - self.git.commits_since_tag = 2 - self.assertEqual(self.git.tweak, 2) - self.git.latest_tag = "v21.12.333.22222-testing" - self.assertEqual(self.git.tweak, 22224) - self.git.commits_since_tag = 0 - self.assertEqual(self.git.tweak, 22222) + # tweak for the latest tag + @dataclass + class TestCase: + tag: str + commits: int + tweak: int + + cases = ( + TestCase("", 0, 1), + TestCase("", 2, 2), + TestCase("v21.12.333.22222-stable", 0, 22222), + TestCase("v21.12.333.22222-stable", 2, 2), + TestCase("v21.12.333.22222-testing", 0, 22222), + TestCase("v21.12.333.22222-testing", 2, 22224), + ) + for tag, commits, tweak in ( + ("latest_tag", "commits_since_latest", "tweak"), + ("new_tag", "commits_since_new", "tweak_to_new"), + ): + for tc in cases: + setattr(self.git, tag, tc.tag) + setattr(self.git, commits, tc.commits) + self.assertEqual( + getattr(self.git, tweak), + tc.tweak, + f"Wrong tweak for tag {tc.tag} and commits {tc.commits} of {tag}", + ) diff --git a/tests/ci/test_version.py b/tests/ci/test_version.py index 978edcc093e..c4f12091ec0 100644 --- a/tests/ci/test_version.py +++ b/tests/ci/test_version.py @@ -2,8 +2,13 @@ import unittest from argparse import ArgumentTypeError +from dataclasses import dataclass +from pathlib import Path import version_helper as vh +from git_helper import Git + +CHV = vh.ClickHouseVersion class TestFunctions(unittest.TestCase): @@ -32,3 +37,55 @@ class TestFunctions(unittest.TestCase): for error_case in error_cases: with self.assertRaises(ArgumentTypeError): version = vh.version_arg(error_case[0]) + + def test_get_version_from_repo(self): + @dataclass + class TestCase: + latest_tag: str + commits_since_latest: int + new_tag: str + commits_since_new: int + expected: CHV + + cases = ( + TestCase( + "v24.6.1.1-new", + 15, + "v24.4.1.2088-stable", + 415, + CHV(24, 5, 1, 54487, None, 415), + ), + TestCase( + "v24.6.1.1-testing", + 15, + "v24.4.1.2088-stable", + 415, + CHV(24, 5, 1, 54487, None, 16), + ), + TestCase( + "v24.6.1.1-stable", + 15, + "v24.4.1.2088-stable", + 415, + CHV(24, 5, 1, 54487, None, 15), + ), + TestCase( + "v24.5.1.1-stable", + 15, + "v24.4.1.2088-stable", + 415, + CHV(24, 5, 1, 54487, None, 15), + ), + ) + git = Git(True) + for tc in cases: + git.latest_tag = tc.latest_tag + git.commits_since_latest = tc.commits_since_latest + git.new_tag = tc.new_tag + git.commits_since_new = tc.commits_since_new + self.assertEqual( + vh.get_version_from_repo( + Path("tests/ci/tests/autogenerated_versions.txt"), git + ), + tc.expected, + ) diff --git a/tests/ci/tests/autogenerated_versions.txt b/tests/ci/tests/autogenerated_versions.txt new file mode 100644 index 00000000000..10028bf50c8 --- /dev/null +++ b/tests/ci/tests/autogenerated_versions.txt @@ -0,0 +1,12 @@ +# This variables autochanged by tests/ci/version_helper.py: + +# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, +# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. +SET(VERSION_REVISION 54487) +SET(VERSION_MAJOR 24) +SET(VERSION_MINOR 5) +SET(VERSION_PATCH 1) +SET(VERSION_GITHASH 70a1d3a63d47f0be077d67b8deb907230fc7cfb0) +SET(VERSION_DESCRIBE v24.5.1.1-testing) +SET(VERSION_STRING 24.5.1.1) +# end of autochange diff --git a/tests/ci/tmp/.gitignore b/tests/ci/tmp/.gitignore new file mode 100644 index 00000000000..72e8ffc0db8 --- /dev/null +++ b/tests/ci/tmp/.gitignore @@ -0,0 +1 @@ +* diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 800bfcf52c3..50263f6ebb6 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import logging -import os.path as p from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, ArgumentTypeError +from pathlib import Path from typing import Any, Dict, Iterable, List, Literal, Optional, Set, Tuple, Union from git_helper import TWEAK, Git, get_tags, git_runner, removeprefix @@ -22,7 +22,7 @@ VERSIONS = Dict[str, Union[int, str]] VERSIONS_TEMPLATE = """# This variables autochanged by tests/ci/version_helper.py: -# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, +# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. SET(VERSION_REVISION {revision}) SET(VERSION_MAJOR {major}) @@ -47,7 +47,7 @@ class ClickHouseVersion: patch: Union[int, str], revision: Union[int, str], git: Optional[Git], - tweak: Optional[str] = None, + tweak: Optional[Union[int, str]] = None, ): self._major = int(major) self._minor = int(minor) @@ -95,7 +95,7 @@ class ClickHouseVersion: if self._git is not None: self._git.update() return ClickHouseVersion( - self.major, self.minor, self.patch, self.revision, self._git, "1" + self.major, self.minor, self.patch, self.revision, self._git, 1 ) @property @@ -114,6 +114,10 @@ class ClickHouseVersion: def tweak(self) -> int: return self._tweak + @tweak.setter + def tweak(self, tweak: int) -> None: + self._tweak = tweak + @property def revision(self) -> int: return self._revision @@ -172,7 +176,7 @@ class ClickHouseVersion: self.patch, self.revision, self._git, - str(self.tweak), + self.tweak, ) try: copy.with_description(self.description) @@ -190,7 +194,9 @@ class ClickHouseVersion: and self.tweak == other.tweak ) - def __lt__(self, other: "ClickHouseVersion") -> bool: + def __lt__(self, other: Any) -> bool: + if not isinstance(self, type(other)): + return NotImplemented for part in ("major", "minor", "patch", "tweak"): if getattr(self, part) < getattr(other, part): return True @@ -220,10 +226,11 @@ ClickHouseVersions = List[ClickHouseVersion] class VersionType: LTS = "lts" + NEW = "new" PRESTABLE = "prestable" STABLE = "stable" TESTING = "testing" - VALID = (TESTING, PRESTABLE, STABLE, LTS) + VALID = (NEW, TESTING, PRESTABLE, STABLE, LTS) def validate_version(version: str) -> None: @@ -234,43 +241,56 @@ def validate_version(version: str) -> None: int(part) -def get_abs_path(path: str) -> str: - return p.abspath(p.join(git_runner.cwd, path)) +def get_abs_path(path: Union[Path, str]) -> Path: + return (Path(git_runner.cwd) / path).absolute() -def read_versions(versions_path: str = FILE_WITH_VERSION_PATH) -> VERSIONS: +def read_versions(versions_path: Union[Path, str] = FILE_WITH_VERSION_PATH) -> VERSIONS: versions = {} - path_to_file = get_abs_path(versions_path) - with open(path_to_file, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line.startswith("SET("): - continue + for line in get_abs_path(versions_path).read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line.startswith("SET("): + continue - value = 0 # type: Union[int, str] - name, value = line[4:-1].split(maxsplit=1) - name = removeprefix(name, "VERSION_").lower() - try: - value = int(value) - except ValueError: - pass - versions[name] = value + value = 0 # type: Union[int, str] + name, value = line[4:-1].split(maxsplit=1) + name = removeprefix(name, "VERSION_").lower() + try: + value = int(value) + except ValueError: + pass + versions[name] = value return versions def get_version_from_repo( - versions_path: str = FILE_WITH_VERSION_PATH, + versions_path: Union[Path, str] = FILE_WITH_VERSION_PATH, git: Optional[Git] = None, ) -> ClickHouseVersion: + """Get a ClickHouseVersion from FILE_WITH_VERSION_PATH. When the `git` parameter is + present, a proper `tweak` version part is calculated for case if the latest tag has + a `new` type and greater than version in `FILE_WITH_VERSION_PATH`""" versions = read_versions(versions_path) - return ClickHouseVersion( + cmake_version = ClickHouseVersion( versions["major"], versions["minor"], versions["patch"], versions["revision"], git, ) + # Since 24.5 we have tags like v24.6.1.1-new, and we must check if the release + # branch already has it's own commit. It's necessary for a proper tweak version + if git is not None and git.latest_tag: + version_from_tag = get_version_from_tag(git.latest_tag) + if ( + version_from_tag.description == VersionType.NEW + and cmake_version < version_from_tag + ): + # We are in a new release branch without existing release. + # We should change the tweak version to a `tweak_to_new` + cmake_version.tweak = git.tweak_to_new + return cmake_version def get_version_from_string( @@ -350,15 +370,15 @@ def get_supported_versions( def update_cmake_version( version: ClickHouseVersion, - versions_path: str = FILE_WITH_VERSION_PATH, + versions_path: Union[Path, str] = FILE_WITH_VERSION_PATH, ) -> None: - path_to_file = get_abs_path(versions_path) - with open(path_to_file, "w", encoding="utf-8") as f: - f.write(VERSIONS_TEMPLATE.format_map(version.as_dict())) + get_abs_path(versions_path).write_text( + VERSIONS_TEMPLATE.format_map(version.as_dict()), encoding="utf-8" + ) def update_contributors( - relative_contributors_path: str = GENERATED_CONTRIBUTORS, + relative_contributors_path: Union[Path, str] = GENERATED_CONTRIBUTORS, force: bool = False, raise_error: bool = False, ) -> None: @@ -378,13 +398,11 @@ def update_contributors( ) contributors = [f' "{c}",' for c in contributors] - executer = p.relpath(p.realpath(__file__), git_runner.cwd) + executer = Path(__file__).relative_to(git_runner.cwd) content = CONTRIBUTORS_TEMPLATE.format( executer=executer, contributors="\n".join(contributors) ) - contributors_path = get_abs_path(relative_contributors_path) - with open(contributors_path, "w", encoding="utf-8") as cfd: - cfd.write(content) + get_abs_path(relative_contributors_path).write_text(content, encoding="utf-8") def update_version_local(version, version_type="testing"): diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index 3e2f33c89d1..eb410ddcb00 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -9,7 +9,7 @@ set -xeuo pipefail echo "Running prepare script" export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.316.1 +export RUNNER_VERSION=2.317.0 export RUNNER_HOME=/home/ubuntu/actions-runner deb_arch() { @@ -54,7 +54,8 @@ apt-get install --yes --no-install-recommends \ python3-dev \ python3-pip \ qemu-user-static \ - unzip + unzip \ + gh # Install docker curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg @@ -101,7 +102,7 @@ sudo -u ubuntu docker buildx version sudo -u ubuntu docker buildx rm default-builder || : # if it's the second attempt sudo -u ubuntu docker buildx create --use --name default-builder -pip install boto3 pygithub requests urllib3 unidiff dohq-artifactory +pip install boto3 pygithub requests urllib3 unidiff dohq-artifactory jwt rm -rf $RUNNER_HOME # if it's the second attempt mkdir -p $RUNNER_HOME && cd $RUNNER_HOME @@ -212,9 +213,9 @@ chmod +x /usr/local/share/scripts/init-network.sh touch /var/tmp/clickhouse-ci-ami.success # END OF THE SCRIPT -# TOE description +# TOE (Task Orchestrator and Executor) description # name: CIInfrastructurePrepare -# description: instals the infrastructure for ClickHouse CI runners +# description: installs the infrastructure for ClickHouse CI runners # schemaVersion: 1.0 # # phases: diff --git a/tests/clickhouse-test b/tests/clickhouse-test index af203563d58..36870d59c3a 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -808,10 +808,10 @@ class SettingsRandomizer: "merge_tree_coarse_index_granularity": lambda: random.randint(2, 32), "optimize_distinct_in_order": lambda: random.randint(0, 1), "max_bytes_before_external_sort": threshold_generator( - 0.3, 0.5, 1, 10 * 1024 * 1024 * 1024 + 0.3, 0.5, 0, 10 * 1024 * 1024 * 1024 ), "max_bytes_before_external_group_by": threshold_generator( - 0.3, 0.5, 1, 10 * 1024 * 1024 * 1024 + 0.3, 0.5, 0, 10 * 1024 * 1024 * 1024 ), "max_bytes_before_remerge_sort": lambda: random.randint(1, 3000000000), "min_compress_block_size": lambda: random.randint(1, 1048576 * 3), @@ -850,6 +850,11 @@ class SettingsRandomizer: "merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability": lambda: round( random.random(), 2 ), + "prefer_external_sort_block_bytes": lambda: random.choice([0, 1, 100000000]), + "cross_join_min_rows_to_compress": lambda: random.choice([0, 1, 100000000]), + "cross_join_min_bytes_to_compress": lambda: random.choice([0, 1, 100000000]), + "min_external_table_block_size_bytes": lambda: random.choice([0, 1, 100000000]), + "max_parsing_threads": lambda: random.choice([0, 1, 10]), } @staticmethod diff --git a/tests/config/config.d/rocksdb.xml b/tests/config/config.d/rocksdb.xml new file mode 100644 index 00000000000..a3790a3dc1d --- /dev/null +++ b/tests/config/config.d/rocksdb.xml @@ -0,0 +1,7 @@ + + + + DEBUG_LEVEL + + + diff --git a/tests/config/install.sh b/tests/config/install.sh index 6536683b6c2..08ee11a7407 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -68,6 +68,7 @@ ln -sf $SRC_PATH/config.d/zero_copy_destructive_operations.xml $DEST_SERVER_PATH ln -sf $SRC_PATH/config.d/block_number.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/handlers.yaml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/serverwide_trace_collector.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/rocksdb.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] @@ -182,13 +183,7 @@ elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then fi if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then - if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - echo "Azure configuration will not be added" - else - echo "Adding azure configuration" - ln -sf $SRC_PATH/config.d/azure_storage_conf.xml $DEST_SERVER_PATH/config.d/ - fi - + ln -sf $SRC_PATH/config.d/azure_storage_conf.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf_02944.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf_02963.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/instructions/easy_tasks_sorted_ru.md b/tests/instructions/easy_tasks_sorted_ru.md index bc95e6b1c37..fbd86ebf08f 100644 --- a/tests/instructions/easy_tasks_sorted_ru.md +++ b/tests/instructions/easy_tasks_sorted_ru.md @@ -78,7 +78,7 @@ Upd: сделали по-другому: теперь всё безопасно. ## LEFT ONLY JOIN -## Функции makeDate, makeDateTime. +## + Функции makeDate, makeDateTime. `makeDate(year, month, day)` `makeDateTime(year, month, day, hour, minute, second, [timezone])` @@ -187,13 +187,13 @@ https://clickhouse.com/docs/en/operations/table_engines/external_data/ Не работает, если открыть clickhouse-client в интерактивном режиме и делать несколько запросов. -## + Настройка для возможности получить частичный результат при cancel-е. +## Настройка для возможности получить частичный результат при cancel-е. Хотим по Ctrl+C получить те данные, которые успели обработаться. ## Раскрытие кортежей в функциях высшего порядка. -## Табличная функция loop. +## + Табличная функция loop. `SELECT * FROM loop(database, table)` diff --git a/tests/integration/compose/docker_compose_ldap.yml b/tests/integration/compose/docker_compose_ldap.yml index f199516f315..1f50b34735d 100644 --- a/tests/integration/compose/docker_compose_ldap.yml +++ b/tests/integration/compose/docker_compose_ldap.yml @@ -15,7 +15,10 @@ services: ports: - ${LDAP_EXTERNAL_PORT:-1389}:${LDAP_INTERNAL_PORT:-1389} healthcheck: - test: "ldapsearch -x -b dc=example,dc=org cn > /dev/null" + test: > + ldapsearch -x -H ldap://localhost:$$LDAP_PORT_NUMBER -D $$LDAP_ADMIN_DN -w $$LDAP_ADMIN_PASSWORD -b $$LDAP_ROOT + | grep -c -E "member: cn=j(ohn|ane)doe" + | grep 2 >> /dev/null interval: 10s retries: 10 timeout: 2s diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 41c162217d2..544b06cca1b 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2640,7 +2640,9 @@ class ClickHouseCluster: [ "bash", "-c", - f"/opt/bitnami/openldap/bin/ldapsearch -x -H ldap://{self.ldap_host}:{self.ldap_port} -D cn=admin,dc=example,dc=org -w clickhouse -b dc=example,dc=org", + f"/opt/bitnami/openldap/bin/ldapsearch -x -H ldap://{self.ldap_host}:{self.ldap_port} -D cn=admin,dc=example,dc=org -w clickhouse -b dc=example,dc=org" + f'| grep -c -E "member: cn=j(ohn|ane)doe"' + f"| grep 2 >> /dev/null", ], user="root", ) diff --git a/tests/integration/helpers/hdfs_api.py b/tests/integration/helpers/hdfs_api.py index 5739496cb50..4e4468fef77 100644 --- a/tests/integration/helpers/hdfs_api.py +++ b/tests/integration/helpers/hdfs_api.py @@ -110,10 +110,9 @@ class HDFSApi(object): logging.debug( "Stdout:\n{}\n".format(res.stdout.decode("utf-8")) ) - logging.debug("Env:\n{}\n".format(env)) raise Exception( "Command {} return non-zero code {}: {}".format( - args, res.returncode, res.stderr.decode("utf-8") + cmd, res.returncode, res.stderr.decode("utf-8") ) ) diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 39fa0d0f074..be710db37d1 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -124,27 +124,27 @@ class KeeperClient(object): return data def cd(self, path: str, timeout: float = 60.0): - self.execute_query(f"cd {path}", timeout) + self.execute_query(f"cd '{path}'", timeout) def ls(self, path: str, timeout: float = 60.0) -> list[str]: - return self.execute_query(f"ls {path}", timeout).split(" ") + return self.execute_query(f"ls '{path}'", timeout).split(" ") def create(self, path: str, value: str, timeout: float = 60.0): - self.execute_query(f"create {path} {value}", timeout) + self.execute_query(f"create '{path}' '{value}'", timeout) def get(self, path: str, timeout: float = 60.0) -> str: - return self.execute_query(f"get {path}", timeout) + return self.execute_query(f"get '{path}'", timeout) def set(self, path: str, value: str, version: tp.Optional[int] = None) -> None: self.execute_query( - f"set {path} {value} {version if version is not None else ''}" + f"set '{path}' '{value}' {version if version is not None else ''}" ) def rm(self, path: str, version: tp.Optional[int] = None) -> None: - self.execute_query(f"rm {path} {version if version is not None else ''}") + self.execute_query(f"rm '{path}' {version if version is not None else ''}") def exists(self, path: str, timeout: float = 60.0) -> bool: - return bool(int(self.execute_query(f"exists {path}", timeout))) + return bool(int(self.execute_query(f"exists '{path}'", timeout))) def stop(self): if not self.stopped: @@ -152,22 +152,22 @@ class KeeperClient(object): self.proc.communicate(b"exit\n", timeout=10.0) def sync(self, path: str, timeout: float = 60.0): - self.execute_query(f"sync {path}", timeout) + self.execute_query(f"sync '{path}'", timeout) def touch(self, path: str, timeout: float = 60.0): - self.execute_query(f"touch {path}", timeout) + self.execute_query(f"touch '{path}'", timeout) def find_big_family(self, path: str, n: int = 10, timeout: float = 60.0) -> str: - return self.execute_query(f"find_big_family {path} {n}", timeout) + return self.execute_query(f"find_big_family '{path}' {n}", timeout) def find_super_nodes(self, threshold: int, timeout: float = 60.0) -> str: return self.execute_query(f"find_super_nodes {threshold}", timeout) def get_direct_children_number(self, path: str, timeout: float = 60.0) -> str: - return self.execute_query(f"get_direct_children_number {path}", timeout) + return self.execute_query(f"get_direct_children_number '{path}'", timeout) def get_all_children_number(self, path: str, timeout: float = 60.0) -> str: - return self.execute_query(f"get_all_children_number {path}", timeout) + return self.execute_query(f"get_all_children_number '{path}'", timeout) def delete_stale_backups(self, timeout: float = 60.0) -> str: return self.execute_query("delete_stale_backups", timeout) @@ -196,7 +196,7 @@ class KeeperClient(object): ) return self.execute_query( - f"reconfig {operation} {joining or leaving or new_members}", timeout + f"reconfig {operation} '{joining or leaving or new_members}'", timeout ) @classmethod diff --git a/tests/integration/helpers/s3_url_proxy_tests_util.py b/tests/integration/helpers/s3_url_proxy_tests_util.py index c67d00769c5..9a45855acb8 100644 --- a/tests/integration/helpers/s3_url_proxy_tests_util.py +++ b/tests/integration/helpers/s3_url_proxy_tests_util.py @@ -30,7 +30,7 @@ def check_proxy_logs( False ), f"{http_method} method not found in logs of {proxy_instance} for bucket {bucket}" - time.sleep(1) + time.sleep(1) def wait_resolver(cluster): @@ -124,3 +124,13 @@ def simple_storage_test(cluster, node, proxies, policy): # not checking for POST because it is in a different format check_proxy_logs(cluster, proxies, "http", policy, ["PUT", "GET"]) + + +def simple_test_assert_no_proxy(cluster, proxies, protocol, bucket): + minio_endpoint = build_s3_endpoint(protocol, bucket) + node = cluster.instances[bucket] + perform_simple_queries(node, minio_endpoint) + + # No HTTP method should be found in proxy logs if no proxy is active + empty_method_list = [] + check_proxy_logs(cluster, proxies, protocol, bucket, empty_method_list) diff --git a/tests/integration/helpers/test_tools.py b/tests/integration/helpers/test_tools.py index 2afbae340be..1c8c5c33a13 100644 --- a/tests/integration/helpers/test_tools.py +++ b/tests/integration/helpers/test_tools.py @@ -139,12 +139,18 @@ def assert_logs_contain_with_retry(instance, substring, retry_count=20, sleep_ti def exec_query_with_retry( - instance, query, retry_count=40, sleep_time=0.5, silent=False, settings={} + instance, + query, + retry_count=40, + sleep_time=0.5, + silent=False, + settings={}, + timeout=30, ): exception = None for cnt in range(retry_count): try: - res = instance.query(query, timeout=30, settings=settings) + res = instance.query(query, timeout=timeout, settings=settings) if not silent: logging.debug(f"Result of {query} on {cnt} try is {res}") break diff --git a/tests/integration/helpers/uclient.py b/tests/integration/helpers/uclient.py index 45c8b8f64e2..195eb52ffeb 100644 --- a/tests/integration/helpers/uclient.py +++ b/tests/integration/helpers/uclient.py @@ -8,7 +8,7 @@ sys.path.insert(0, os.path.join(CURDIR)) from . import uexpect -prompt = ":\) " +prompt = ":\\) " end_of_block = r".*\r\n.*\r\n" @@ -21,7 +21,7 @@ class client(object): self.client.eol("\r") self.client.logger(log, prefix=name) self.client.timeout(20) - self.client.expect("[#\$] ", timeout=2) + self.client.expect("[#\\$] ", timeout=2) self.client.send(command) def __enter__(self): diff --git a/tests/integration/test_MemoryTracking/configs/no_system_log.xml b/tests/integration/test_MemoryTracking/configs/no_system_log.xml index 3218dae4dc7..7d80c7fbf78 100644 --- a/tests/integration/test_MemoryTracking/configs/no_system_log.xml +++ b/tests/integration/test_MemoryTracking/configs/no_system_log.xml @@ -5,6 +5,7 @@ + diff --git a/tests/integration/test_access_control_on_cluster/test.py b/tests/integration/test_access_control_on_cluster/test.py index 8dbb87c67d8..b12add7ad3f 100644 --- a/tests/integration/test_access_control_on_cluster/test.py +++ b/tests/integration/test_access_control_on_cluster/test.py @@ -74,3 +74,18 @@ def test_grant_all_on_cluster(): assert ch2.query("SHOW GRANTS FOR Alex") == "GRANT ALL ON *.* TO Alex\n" ch1.query("DROP USER Alex ON CLUSTER 'cluster'") + + +def test_grant_current_database_on_cluster(): + ch1.query("CREATE DATABASE user_db ON CLUSTER 'cluster'") + ch1.query( + "CREATE USER IF NOT EXISTS test_user ON CLUSTER 'cluster' DEFAULT DATABASE user_db" + ) + ch1.query( + "GRANT SELECT ON user_db.* TO test_user ON CLUSTER 'cluster' WITH GRANT OPTION" + ) + ch1.query("GRANT CLUSTER ON *.* TO test_user ON CLUSTER 'cluster'") + + assert ch1.query("SHOW DATABASES", user="test_user") == "user_db\n" + ch1.query("GRANT SELECT ON * TO test_user ON CLUSTER 'cluster'", user="test_user") + assert ch1.query("SHOW DATABASES", user="test_user") == "user_db\n" diff --git a/tests/integration/test_manipulate_statistic/__init__.py b/tests/integration/test_asynchronous_metric_jemalloc_profile_active/__init__.py similarity index 100% rename from tests/integration/test_manipulate_statistic/__init__.py rename to tests/integration/test_asynchronous_metric_jemalloc_profile_active/__init__.py diff --git a/tests/integration/test_asynchronous_metric_jemalloc_profile_active/configs/asynchronous_metrics_update_period_s.xml b/tests/integration/test_asynchronous_metric_jemalloc_profile_active/configs/asynchronous_metrics_update_period_s.xml new file mode 100644 index 00000000000..47e88730482 --- /dev/null +++ b/tests/integration/test_asynchronous_metric_jemalloc_profile_active/configs/asynchronous_metrics_update_period_s.xml @@ -0,0 +1,3 @@ + + 1 + diff --git a/tests/integration/test_asynchronous_metric_jemalloc_profile_active/test.py b/tests/integration/test_asynchronous_metric_jemalloc_profile_active/test.py new file mode 100644 index 00000000000..b3769a61b3f --- /dev/null +++ b/tests/integration/test_asynchronous_metric_jemalloc_profile_active/test.py @@ -0,0 +1,72 @@ +import time + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", + main_configs=["configs/asynchronous_metrics_update_period_s.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +# asynchronous metrics are updated once every 60s by default. To make the test run faster, the setting +# asynchronous_metric_update_period_s is being set to 1s so that the metrics are populated faster and +# are available for querying during the test. +def test_asynchronous_metric_jemalloc_profile_active(started_cluster): + # default open + if node1.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + + res = node1.query( + "SELECT * FROM system.asynchronous_metrics WHERE metric ILIKE '%jemalloc.prof.active%' FORMAT Vertical;" + ) + assert ( + res + == """Row 1: +────── +metric: jemalloc.prof.active +value: 0 +description: An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html +""" + ) + # enable + node1.query("SYSTEM JEMALLOC ENABLE PROFILE") + node1.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") + res = node1.query( + "SELECT * FROM system.asynchronous_metrics WHERE metric ILIKE '%jemalloc.prof.active%' FORMAT Vertical;" + ) + assert ( + res + == """Row 1: +────── +metric: jemalloc.prof.active +value: 1 +description: An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html +""" + ) + # disable + node1.query("SYSTEM JEMALLOC DISABLE PROFILE") + node1.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") + res = node1.query( + "SELECT * FROM system.asynchronous_metrics WHERE metric ILIKE '%jemalloc.prof.active%' FORMAT Vertical;" + ) + assert ( + res + == """Row 1: +────── +metric: jemalloc.prof.active +value: 0 +description: An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html +""" + ) diff --git a/tests/integration/test_attach_partition_using_copy/test.py b/tests/integration/test_attach_partition_using_copy/test.py index e7163b1eb32..d5b07603dff 100644 --- a/tests/integration/test_attach_partition_using_copy/test.py +++ b/tests/integration/test_attach_partition_using_copy/test.py @@ -98,7 +98,8 @@ def create_destination_table(node, table_name, replicated): ) -def test_both_mergtree(start_cluster): +def test_both_mergetree(start_cluster): + cleanup([replica1, replica2]) create_source_table(replica1, "source", False) create_destination_table(replica1, "destination", False) @@ -120,12 +121,13 @@ def test_both_mergtree(start_cluster): def test_all_replicated(start_cluster): + cleanup([replica1, replica2]) create_source_table(replica1, "source", True) create_destination_table(replica1, "destination", True) create_destination_table(replica2, "destination", True) - replica1.query("SYSTEM SYNC REPLICA destination") replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") + replica2.query("SYSTEM SYNC REPLICA destination") assert_eq_with_retry( replica1, @@ -154,12 +156,13 @@ def test_all_replicated(start_cluster): def test_only_destination_replicated(start_cluster): + cleanup([replica1, replica2]) create_source_table(replica1, "source", False) create_destination_table(replica1, "destination", True) create_destination_table(replica2, "destination", True) - replica1.query("SYSTEM SYNC REPLICA destination") replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") + replica2.query("SYSTEM SYNC REPLICA destination") assert_eq_with_retry( replica1, @@ -188,6 +191,7 @@ def test_only_destination_replicated(start_cluster): def test_not_work_on_different_disk(start_cluster): + cleanup([replica1, replica2]) # Replace and move should not work on replace create_source_table(replica1, "source", False) create_destination_table(replica2, "destination", False) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index ef9e536976b..d8662fad011 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1,5 +1,4 @@ import pytest -import asyncio import glob import re import random @@ -168,6 +167,32 @@ def test_restore_table(engine): assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" +def test_restore_materialized_view_with_definer(): + instance.query("CREATE DATABASE test") + instance.query( + "CREATE TABLE test.test_table (s String) ENGINE = MergeTree ORDER BY s" + ) + instance.query("CREATE USER u1") + instance.query("GRANT SELECT ON *.* TO u1") + instance.query("GRANT INSERT ON *.* TO u1") + + instance.query( + """ + CREATE MATERIALIZED VIEW test.test_mv_1 (s String) + ENGINE = MergeTree ORDER BY s + DEFINER = u1 SQL SECURITY DEFINER + AS SELECT * FROM test.test_table + """ + ) + + backup_name = new_backup_name() + instance.query(f"BACKUP DATABASE test TO {backup_name}") + instance.query("DROP DATABASE test") + instance.query("DROP USER u1") + + instance.query(f"RESTORE DATABASE test FROM {backup_name}") + + @pytest.mark.parametrize( "engine", ["MergeTree", "Log", "TinyLog", "StripeLog", "Memory"] ) @@ -1460,6 +1485,7 @@ def test_backup_all(exclude_system_log_tables): "processors_profile_log", "asynchronous_insert_log", "backup_log", + "error_log", ] exclude_from_backup += ["system." + table_name for table_name in log_tables] @@ -1474,7 +1500,7 @@ def test_backup_all(exclude_system_log_tables): restore_settings = [] if not exclude_system_log_tables: restore_settings.append("allow_non_empty_tables=true") - restore_command = f"RESTORE ALL FROM {backup_name} {'SETTINGS '+ ', '.join(restore_settings) if restore_settings else ''}" + restore_command = f"RESTORE ALL FROM {backup_name} {'SETTINGS ' + ', '.join(restore_settings) if restore_settings else ''}" session_id = new_session_id() instance.http_query( diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index c9f20333654..cd0f2032559 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -161,13 +161,13 @@ def wait_for_fail_restore(node, restore_id): elif status == "RESTORING": assert_eq_with_retry( node, - f"SELECT status FROM system.backups WHERE id = '{backup_id}'", + f"SELECT status FROM system.backups WHERE id = '{restore_id}'", "RESTORE_FAILED", sleep_time=2, retry_count=50, ) error = node.query( - f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + f"SELECT error FROM system.backups WHERE id == '{restore_id}'" ).rstrip("\n") assert re.search( "Cannot restore the table default.tbl because it already contains some data", diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 967ed6a221c..d53335000a6 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -627,67 +627,126 @@ def test_user_specific_auth(start_cluster): create_user("superuser2") create_user("regularuser") - node.query("CREATE TABLE specific_auth (col UInt64) ENGINE=Memory") + node.query("CREATE TABLE specific_auth (col UInt64) ENGINE=MergeTree ORDER BY col") + node.query("INSERT INTO specific_auth VALUES (1)") - assert "Access" in node.query_and_get_error( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')" + def backup_restore(backup, user, should_fail, on_cluster=False, base_backup=None): + on_cluster_clause = "ON CLUSTER 'cluster'" if on_cluster else "" + base_backup = ( + f" SETTINGS base_backup = {base_backup}" if base_backup is not None else "" + ) + backup_query = ( + f"BACKUP TABLE specific_auth {on_cluster_clause} TO {backup} {base_backup}" + ) + restore_query = f"RESTORE TABLE specific_auth {on_cluster_clause} FROM {backup}" + + if should_fail: + assert "Access" in node.query_and_get_error(backup_query, user=user) + else: + node.query(backup_query, user=user) + node.query("DROP TABLE specific_auth SYNC") + node.query(restore_query, user=user) + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1/')", + user=None, + should_fail=True, ) - assert "Access" in node.query_and_get_error( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1/')", user="regularuser", + should_fail=True, ) - node.query( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", - user="superuser1", - ) - node.query( - "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1/')", user="superuser1", + should_fail=False, ) - node.query( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup2.zip')", - user="superuser2", - ) - node.query( - "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup2.zip')", + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup2/')", user="superuser2", + should_fail=False, ) assert "Access" in node.query_and_get_error( - "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1/')", user="regularuser", ) - assert "HTTP response code: 403" in node.query_and_get_error( - "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", + node.query("INSERT INTO specific_auth VALUES (2)") + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1_inc/')", user="regularuser", + should_fail=True, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup1/')", ) - node.query( - "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1_inc/')", user="superuser1", + should_fail=False, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup1/')", + ) + + assert "Access" in node.query_and_get_error( + "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1_inc/')", + user="regularuser", ) assert "Access Denied" in node.query_and_get_error( - "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1/*', 'RawBLOB')", user="regularuser", ) node.query( - "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1/*', 'RawBLOB')", user="superuser1", ) + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="regularuser", + should_fail=True, + on_cluster=True, + ) + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="superuser1", + should_fail=False, + on_cluster=True, + ) + assert "Access Denied" in node.query_and_get_error( "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", user="regularuser", ) - node.query( - "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", + node.query("INSERT INTO specific_auth VALUES (3)") + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3_inc/')", + user="regularuser", + should_fail=True, + on_cluster=True, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup3/')", + ) + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3_inc/')", user="superuser1", + should_fail=False, + on_cluster=True, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup3/')", + ) + + assert "Access Denied" in node.query_and_get_error( + "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3_inc/')", + user="regularuser", ) assert "Access Denied" in node.query_and_get_error( diff --git a/tests/integration/test_backward_compatibility/test_convert_ordinary.py b/tests/integration/test_backward_compatibility/test_convert_ordinary.py index b8db4e005a4..f5d0c066600 100644 --- a/tests/integration/test_backward_compatibility/test_convert_ordinary.py +++ b/tests/integration/test_backward_compatibility/test_convert_ordinary.py @@ -187,7 +187,7 @@ def check_convert_all_dbs_to_atomic(): # 6 tables, MVs contain 2 rows (inner tables does not match regexp) assert "8\t{}\n".format(8 * len("atomic")) == node.query( - "SELECT count(), sum(n) FROM atomic.merge".format(db) + "SELECT count(), sum(n) FROM atomic.merge" ) node.query("DETACH TABLE ordinary.detached PERMANENTLY") diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index 1cf5c3deb81..758dda655da 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -89,7 +89,7 @@ def test_aggregate_states(start_cluster): logging.info("Skipping %s", aggregate_function) skipped += 1 continue - logging.exception("Failed %s", function) + logging.exception("Failed %s", aggregate_function) failed += 1 continue diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 476f7c61b28..1ed70e20b79 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -533,11 +533,12 @@ def test_query_is_canceled_with_inf_retries(cluster, broken_s3): SELECT * FROM system.numbers - LIMIT 1000000 + LIMIT 1000000000 SETTINGS s3_max_single_part_upload_size=100, s3_min_upload_part_size=10000, - s3_check_objects_after_upload=0 + s3_check_objects_after_upload=0, + s3_max_inflight_parts_for_one_file=1000 """, query_id=insert_query_id, ) diff --git a/tests/integration/test_composable_protocols/configs/config.xml b/tests/integration/test_composable_protocols/configs/config.xml index 09a512eb5a4..7d39363bc62 100644 --- a/tests/integration/test_composable_protocols/configs/config.xml +++ b/tests/integration/test_composable_protocols/configs/config.xml @@ -58,6 +58,26 @@ 8444 https protocol endpoint + + tls + http + 0.0.0.0 + 8445 + https protocol with TLSv1_2 minimum version + sslv2,sslv3,tlsv1,tlsv1_1 + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + + + tls + http + 0.0.0.0 + 8446 + https protocol with TLSv1_3 minimum version + sslv2,sslv3,tlsv1,tlsv1_1,tlsv1_2 + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index aa5a1e766e6..241d1505433 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -7,6 +7,7 @@ from helpers.client import Client import urllib.request, urllib.parse import subprocess import socket +import warnings SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -27,18 +28,34 @@ def setup_nodes(): cluster.shutdown() -def execute_query_https(host, port, query): +def execute_query_https(host, port, query, version=None): url = f"https://{host}:{port}/?query={urllib.parse.quote(query)}" ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE + if version: + ctx.minimum_version = version + ctx.maximum_version = version request = urllib.request.Request(url) response = urllib.request.urlopen(request, context=ctx).read() return response.decode("utf-8") +def execute_query_https_unsupported(host, port, query, version=None): + try: + execute_query_https(host, port, query, version) + except Exception as e: + e_text = str(e) + if "NO_PROTOCOLS_AVAILABLE" in e_text: + return True + if "TLSV1_ALERT_PROTOCOL_VERSION" in e_text: + return True + raise + return False + + def execute_query_http(host, port, query): url = f"http://{host}:{port}/?query={urllib.parse.quote(query)}" @@ -84,6 +101,49 @@ def test_connections(): assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" + warnings.filterwarnings("ignore", category=DeprecationWarning) + + assert execute_query_https_unsupported( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.SSLv3 + ) + assert execute_query_https_unsupported( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1 + ) + assert execute_query_https_unsupported( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1_1 + ) + assert ( + execute_query_https( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1_2 + ) + == "1\n" + ) + assert ( + execute_query_https( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1_3 + ) + == "1\n" + ) + + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.SSLv3 + ) + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1 + ) + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1_1 + ) + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1_2 + ) + assert ( + execute_query_https( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1_3 + ) + == "1\n" + ) + data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\21ClickHouse client\024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" assert ( netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find( diff --git a/tests/integration/test_config_reloader_interval/__init__.py b/tests/integration/test_config_reloader_interval/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_config_reloader_interval/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_config_reloader_interval/configs/config_reloader.xml b/tests/integration/test_config_reloader_interval/configs/config_reloader.xml new file mode 100644 index 00000000000..1dc9a59bd9d --- /dev/null +++ b/tests/integration/test_config_reloader_interval/configs/config_reloader.xml @@ -0,0 +1,4 @@ + + + 1000 + diff --git a/tests/integration/test_config_reloader_interval/test.py b/tests/integration/test_config_reloader_interval/test.py new file mode 100644 index 00000000000..22b66ecac30 --- /dev/null +++ b/tests/integration/test_config_reloader_interval/test.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import pytest +import fnmatch + +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/config_reloader.xml"], +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_reload_config(start_cluster): + assert node.wait_for_log_line( + f"Config reload interval set to 1000ms", look_behind_lines=2000 + ) + + assert ( + node.query( + "SELECT value from system.server_settings where name = 'config_reload_interval_ms'" + ) + == "1000\n" + ) + node.replace_in_config( + "/etc/clickhouse-server/config.d/config_reloader.xml", + "1000", + "7777", + ) + + assert node.wait_for_log_line( + f"Config reload interval changed to 7777ms", look_behind_lines=2000 + ) + + assert ( + node.query( + "SELECT value from system.server_settings where name = 'config_reload_interval_ms'" + ) + == "7777\n" + ) diff --git a/tests/integration/test_config_xml_full/configs/config.d/error_log.xml b/tests/integration/test_config_xml_full/configs/config.d/error_log.xml new file mode 100644 index 00000000000..903d8699f5c --- /dev/null +++ b/tests/integration/test_config_xml_full/configs/config.d/error_log.xml @@ -0,0 +1,8 @@ + + + system + error_log
+ 7500 + 1000 +
+
diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index 628e1432350..61aa0a5c724 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -756,6 +756,14 @@ 1000
+ + + system + error_log
+ 7500 + 1000 +
+ + 300 + 1 + 128 + diff --git a/tests/integration/test_replicated_merge_tree_thread_schedule_timeouts/test.py b/tests/integration/test_replicated_merge_tree_thread_schedule_timeouts/test.py new file mode 100644 index 00000000000..515d9530424 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_thread_schedule_timeouts/test.py @@ -0,0 +1,68 @@ +import concurrent.futures + +import pytest +from helpers.cluster import ClickHouseCluster + + +MAX_THREADS = 60 + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", + macros={"cluster": "test-cluster", "replica": "node1"}, + main_configs=["configs/settings.xml"], + with_zookeeper=True, +) + + +def prepare_cluster(): + node1.query("DROP TABLE IF EXISTS test_threads_busy SYNC") + node1.query( + """ + CREATE TABLE test_threads_busy(d Date, i Int64, s String) ENGINE=MergeTree PARTITION BY toYYYYMMDD(d) ORDER BY d + """ + ) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def do_slow_select(): + # Do a bunch of slow queries that use a large number of threads to saturate max_thread_pool_size + # explicitly set max_threads as otherwise it's relative to the number of CPU cores + query = ( + "SELECT d, i, s, sleepEachRow(3) from test_threads_busy SETTINGS max_threads=40" + ) + node1.query(query) + + +def test_query_exception_on_thread_pool_full(started_cluster): + prepare_cluster() + # Generate some sample data so sleepEachRow in do_slow_select works + node1.query( + f"INSERT INTO test_threads_busy VALUES ('2024-01-01', 1, 'thread-test')" + ) + + futures = [] + errors = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_THREADS) as executor: + for _ in range(MAX_THREADS): + futures.append(executor.submit(do_slow_select)) + + for f in futures: + try: + f.result() + except Exception as err: + errors.append(str(err)) + assert len(errors) > 0, "Should be 'Cannot schedule a task' exceptions" + assert all( + "Cannot schedule a task" in err for err in errors + ), "Query threads are stuck, or returned an unexpected error" diff --git a/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_list_no_proxy.xml b/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_list_no_proxy.xml new file mode 100644 index 00000000000..a1601153151 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_list_no_proxy.xml @@ -0,0 +1,9 @@ + + + not_important_host,, minio1 , + + http://proxy1 + http://proxy2 + + + diff --git a/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_remote_no_proxy.xml b/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_remote_no_proxy.xml new file mode 100644 index 00000000000..6c16a65b154 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_http_proxy/configs/config.d/proxy_remote_no_proxy.xml @@ -0,0 +1,18 @@ + + + not_important_host,, minio1 , + + + + http://resolver:8080/hostname + http + 80 + 10 + + + + diff --git a/tests/integration/test_s3_table_function_with_http_proxy/test.py b/tests/integration/test_s3_table_function_with_http_proxy/test.py index 76ad2109efc..2ec73ecbef6 100644 --- a/tests/integration/test_s3_table_function_with_http_proxy/test.py +++ b/tests/integration/test_s3_table_function_with_http_proxy/test.py @@ -19,6 +19,14 @@ def cluster(): with_minio=True, ) + cluster.add_instance( + "remote_proxy_node_no_proxy", + main_configs=[ + "configs/config.d/proxy_remote_no_proxy.xml", + ], + with_minio=True, + ) + cluster.add_instance( "proxy_list_node", main_configs=[ @@ -27,6 +35,14 @@ def cluster(): with_minio=True, ) + cluster.add_instance( + "proxy_list_node_no_proxy", + main_configs=[ + "configs/config.d/proxy_list_no_proxy.xml", + ], + with_minio=True, + ) + cluster.add_instance( "env_node", with_minio=True, @@ -36,6 +52,16 @@ def cluster(): instance_env_variables=True, ) + cluster.add_instance( + "env_node_no_proxy", + with_minio=True, + env_variables={ + "http_proxy": "http://proxy1", + "no_proxy": "not_important_host,, minio1 ,", + }, + instance_env_variables=True, + ) + logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") @@ -48,6 +74,24 @@ def cluster(): cluster.shutdown() +def test_s3_with_http_proxy_list_no_proxy(cluster): + proxy_util.simple_test_assert_no_proxy( + cluster, ["proxy1", "proxy2"], "http", "proxy_list_node_no_proxy" + ) + + +def test_s3_with_http_remote_proxy_no_proxy(cluster): + proxy_util.simple_test_assert_no_proxy( + cluster, ["proxy1"], "http", "remote_proxy_node_no_proxy" + ) + + +def test_s3_with_http_env_no_proxy(cluster): + proxy_util.simple_test_assert_no_proxy( + cluster, ["proxy1"], "http", "env_node_no_proxy" + ) + + def test_s3_with_http_proxy_list(cluster): proxy_util.simple_test(cluster, ["proxy1", "proxy2"], "http", "proxy_list_node") diff --git a/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_list_no_proxy.xml b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_list_no_proxy.xml new file mode 100644 index 00000000000..0a03986f839 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_list_no_proxy.xml @@ -0,0 +1,13 @@ + + + not_important_host,, minio1 , + + http://proxy1 + http://proxy2 + + + https://proxy1 + https://proxy2 + + + diff --git a/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_remote_no_proxy.xml b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_remote_no_proxy.xml new file mode 100644 index 00000000000..943f2b36a34 --- /dev/null +++ b/tests/integration/test_s3_table_function_with_https_proxy/configs/config.d/proxy_remote_no_proxy.xml @@ -0,0 +1,18 @@ + + + not_important_host,, minio1 , + + + + http://resolver:8080/hostname + https + 443 + 10 + + + + diff --git a/tests/integration/test_s3_table_function_with_https_proxy/test.py b/tests/integration/test_s3_table_function_with_https_proxy/test.py index 8b40b232742..54452dda401 100644 --- a/tests/integration/test_s3_table_function_with_https_proxy/test.py +++ b/tests/integration/test_s3_table_function_with_https_proxy/test.py @@ -23,6 +23,15 @@ def cluster(): minio_certs_dir="minio_certs", ) + cluster.add_instance( + "remote_proxy_node_no_proxy", + main_configs=[ + "configs/config.d/proxy_remote_no_proxy.xml", + "configs/config.d/ssl.xml", + ], + with_minio=True, + ) + cluster.add_instance( "proxy_list_node", main_configs=[ @@ -32,6 +41,15 @@ def cluster(): with_minio=True, ) + cluster.add_instance( + "proxy_list_node_no_proxy", + main_configs=[ + "configs/config.d/proxy_list_no_proxy.xml", + "configs/config.d/ssl.xml", + ], + with_minio=True, + ) + cluster.add_instance( "env_node", main_configs=[ @@ -44,6 +62,19 @@ def cluster(): instance_env_variables=True, ) + cluster.add_instance( + "env_node_no_proxy", + main_configs=[ + "configs/config.d/ssl.xml", + ], + with_minio=True, + env_variables={ + "https_proxy": "https://proxy1", + "no_proxy": "not_important_host,, minio1 ,", + }, + instance_env_variables=True, + ) + logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") @@ -56,6 +87,24 @@ def cluster(): cluster.shutdown() +def test_s3_with_https_proxy_list_no_proxy(cluster): + proxy_util.simple_test_assert_no_proxy( + cluster, ["proxy1", "proxy2"], "https", "proxy_list_node_no_proxy" + ) + + +def test_s3_with_https_env_no_proxy(cluster): + proxy_util.simple_test_assert_no_proxy( + cluster, ["proxy1"], "https", "env_node_no_proxy" + ) + + +def test_s3_with_https_remote_no_proxy(cluster): + proxy_util.simple_test_assert_no_proxy( + cluster, ["proxy1"], "https", "remote_proxy_node_no_proxy" + ) + + def test_s3_with_https_proxy_list(cluster): proxy_util.simple_test(cluster, ["proxy1", "proxy2"], "https", "proxy_list_node") diff --git a/tests/integration/test_scheduler/configs/resources.xml b/tests/integration/test_scheduler/configs/resources.xml new file mode 100644 index 00000000000..197bf660500 --- /dev/null +++ b/tests/integration/test_scheduler/configs/resources.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_scheduler/configs/resources.xml.default b/tests/integration/test_scheduler/configs/resources.xml.default new file mode 100644 index 00000000000..3b003a17557 --- /dev/null +++ b/tests/integration/test_scheduler/configs/resources.xml.default @@ -0,0 +1,76 @@ + + + + inflight_limit1000000 + priority + fifo0 + fair1 + fifo9 + fifo1 + fair90 + fifo + fifo + fifo9 + fifo9 + fifo9 + fifo9 + + + inflight_limit1000000 + priority + fifo0 + fair1 + fifo9 + fifo1 + fair90 + fifo + fifo + fifo9 + fifo9 + fifo9 + fifo9 + + + + + /prio/admin + /prio/admin + + + /prio/fair/prod + /prio/fair/prod + + + /prio/fair/dev + /prio/fair/dev + + + /prio/fair/dev + /prio/fair/dev + + + /prio/fair/sys/merges + /prio/fair/sys/merges + + + /prio/fair/sys/mutations + /prio/fair/sys/mutations + + + /prio/fair/prod_merges + /prio/fair/prod_merges + + + /prio/fair/prod_mutations + /prio/fair/prod_mutations + + + /prio/fair/dev_merges + /prio/fair/dev_merges + + + /prio/fair/dev_mutations + /prio/fair/dev_mutations + + + diff --git a/tests/integration/test_scheduler/configs/scheduler.xml b/tests/integration/test_scheduler/configs/scheduler.xml deleted file mode 100644 index 523ba1a5a98..00000000000 --- a/tests/integration/test_scheduler/configs/scheduler.xml +++ /dev/null @@ -1,62 +0,0 @@ - - - - - s3 - http://minio1:9001/root/data/ - minio - minio123 - 33554432 - 10 - 10 - network_read - network_write - - - - - -

diff --git a/tests/integration/test_scheduler/configs/storage_configuration.xml b/tests/integration/test_scheduler/configs/storage_configuration.xml new file mode 100644 index 00000000000..823a00a05de --- /dev/null +++ b/tests/integration/test_scheduler/configs/storage_configuration.xml @@ -0,0 +1,26 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + 33554432 + 10 + 10 + network_read + network_write + + + + + +
+ s3 +
+
+
+
+
+
diff --git a/tests/integration/test_scheduler/configs/workloads.xml b/tests/integration/test_scheduler/configs/workloads.xml new file mode 100644 index 00000000000..197bf660500 --- /dev/null +++ b/tests/integration/test_scheduler/configs/workloads.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_scheduler/configs/workloads.xml.default b/tests/integration/test_scheduler/configs/workloads.xml.default new file mode 100644 index 00000000000..f010993335d --- /dev/null +++ b/tests/integration/test_scheduler/configs/workloads.xml.default @@ -0,0 +1,4 @@ + + sys_merges + sys_mutations + diff --git a/tests/integration/test_scheduler/test.py b/tests/integration/test_scheduler/test.py index e6def99c076..cde75c244e8 100644 --- a/tests/integration/test_scheduler/test.py +++ b/tests/integration/test_scheduler/test.py @@ -6,6 +6,7 @@ import time import threading import pytest +from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -13,7 +14,13 @@ cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", stay_alive=True, - main_configs=["configs/scheduler.xml"], + main_configs=[ + "configs/storage_configuration.xml", + "configs/resources.xml", + "configs/resources.xml.default", + "configs/workloads.xml", + "configs/workloads.xml.default", + ], with_minio=True, ) @@ -27,6 +34,41 @@ def start_cluster(): cluster.shutdown() +@pytest.fixture(scope="function", autouse=True) +def set_default_configs(): + node.exec_in_container( + [ + "bash", + "-c", + "cp /etc/clickhouse-server/config.d/resources.xml.default /etc/clickhouse-server/config.d/resources.xml", + ] + ) + node.exec_in_container( + [ + "bash", + "-c", + "cp /etc/clickhouse-server/config.d/workloads.xml.default /etc/clickhouse-server/config.d/workloads.xml", + ] + ) + node.query("system reload config") + yield + + +def update_workloads_config(**settings): + xml = "" + for name in settings: + xml += f"<{name}>{settings[name]}" + print(xml) + node.exec_in_container( + [ + "bash", + "-c", + f"echo '{xml}' > /etc/clickhouse-server/config.d/workloads.xml", + ] + ) + node.query("system reload config") + + def test_s3_disk(): node.query( f""" @@ -110,3 +152,302 @@ def test_s3_disk(): ) == "1\n" ) + + +def test_merge_workload(): + node.query( + f""" + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3'; + """ + ) + + reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/sys/merges'" + ).strip() + ) + writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/sys/merges'" + ).strip() + ) + + node.query(f"insert into data select * from numbers(1e4)") + node.query(f"insert into data select * from numbers(2e4)") + node.query(f"insert into data select * from numbers(3e4)") + node.query(f"optimize table data final") + + reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/sys/merges'" + ).strip() + ) + writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/sys/merges'" + ).strip() + ) + + assert reads_before < reads_after + assert writes_before < writes_after + + +def test_merge_workload_override(): + node.query( + f""" + drop table if exists prod_data; + drop table if exists dev_data; + create table prod_data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3', merge_workload='prod_merges'; + create table dev_data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3', merge_workload='dev_merges'; + """ + ) + + prod_reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/prod_merges'" + ).strip() + ) + prod_writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/prod_merges'" + ).strip() + ) + dev_reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/dev_merges'" + ).strip() + ) + dev_writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/dev_merges'" + ).strip() + ) + + node.query(f"insert into prod_data select * from numbers(1e4)") + node.query(f"insert into prod_data select * from numbers(2e4)") + node.query(f"insert into prod_data select * from numbers(3e4)") + node.query(f"insert into dev_data select * from numbers(1e4)") + node.query(f"insert into dev_data select * from numbers(2e4)") + node.query(f"insert into dev_data select * from numbers(3e4)") + node.query(f"optimize table prod_data final") + node.query(f"optimize table dev_data final") + + prod_reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/prod_merges'" + ).strip() + ) + prod_writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/prod_merges'" + ).strip() + ) + dev_reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/dev_merges'" + ).strip() + ) + dev_writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/dev_merges'" + ).strip() + ) + + assert prod_reads_before < prod_reads_after + assert prod_writes_before < prod_writes_after + assert dev_reads_before < dev_reads_after + assert dev_writes_before < dev_writes_after + + +def test_mutate_workload(): + node.query( + f""" + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3'; + """ + ) + + node.query(f"insert into data select * from numbers(1e4)") + node.query(f"optimize table data final") + + reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/sys/mutations'" + ).strip() + ) + writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/sys/mutations'" + ).strip() + ) + + node.query(f"alter table data update key = 1 where key = 42") + node.query(f"optimize table data final") + + reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/sys/mutations'" + ).strip() + ) + writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/sys/mutations'" + ).strip() + ) + + assert reads_before < reads_after + assert writes_before < writes_after + + +def test_mutation_workload_override(): + node.query( + f""" + drop table if exists prod_data; + drop table if exists dev_data; + create table prod_data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3', mutation_workload='prod_mutations'; + create table dev_data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3', mutation_workload='dev_mutations'; + """ + ) + + node.query(f"insert into prod_data select * from numbers(1e4)") + node.query(f"optimize table prod_data final") + node.query(f"insert into dev_data select * from numbers(1e4)") + node.query(f"optimize table dev_data final") + + prod_reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/prod_mutations'" + ).strip() + ) + prod_writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/prod_mutations'" + ).strip() + ) + dev_reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/dev_mutations'" + ).strip() + ) + dev_writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/dev_mutations'" + ).strip() + ) + + node.query(f"alter table prod_data update key = 1 where key = 42") + node.query(f"optimize table prod_data final") + node.query(f"alter table dev_data update key = 1 where key = 42") + node.query(f"optimize table dev_data final") + + prod_reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/prod_mutations'" + ).strip() + ) + prod_writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/prod_mutations'" + ).strip() + ) + dev_reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/dev_mutations'" + ).strip() + ) + dev_writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/dev_mutations'" + ).strip() + ) + + assert prod_reads_before < prod_reads_after + assert prod_writes_before < prod_writes_after + assert dev_reads_before < dev_reads_after + assert dev_writes_before < dev_writes_after + + +def test_merge_workload_change(): + node.query( + f""" + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3'; + """ + ) + + for env in ["prod", "dev"]: + update_workloads_config(merge_workload=f"{env}_merges") + + reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/{env}_merges'" + ).strip() + ) + writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/{env}_merges'" + ).strip() + ) + + node.query(f"insert into data select * from numbers(1e4)") + node.query(f"insert into data select * from numbers(2e4)") + node.query(f"insert into data select * from numbers(3e4)") + node.query(f"optimize table data final") + + reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/{env}_merges'" + ).strip() + ) + writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/{env}_merges'" + ).strip() + ) + + assert reads_before < reads_after + assert writes_before < writes_after + + +def test_mutation_workload_change(): + node.query( + f""" + drop table if exists data; + create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, storage_policy='s3'; + """ + ) + + for env in ["prod", "dev"]: + update_workloads_config(mutation_workload=f"{env}_mutations") + + node.query(f"insert into data select * from numbers(1e4)") + node.query(f"optimize table data final") + + reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/{env}_mutations'" + ).strip() + ) + writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/{env}_mutations'" + ).strip() + ) + + node.query(f"alter table data update key = 1 where key = 42") + node.query(f"optimize table data final") + + reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/fair/{env}_mutations'" + ).strip() + ) + writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/fair/{env}_mutations'" + ).strip() + ) + + assert reads_before < reads_after + assert writes_before < writes_after diff --git a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem index a64cd623750..d1e4a3a88d9 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem @@ -1,32 +1,32 @@ -----BEGIN CERTIFICATE----- -MIIFhTCCA22gAwIBAgIUQOHzlr+pa+RiBlRROQnQgfkDRUMwDQYJKoZIhvcNAQEL +MIIFhTCCA22gAwIBAgIUZmPYBB6vdp8uxKlJcS8mI0SArqQwDQYJKoZIhvcNAQEL BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM -GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjIwODA4 -MTcwNTQwWhcNMzIwODA1MTcwNTQwWjBSMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwNjI2 +MTAyNTAwWhcNMzQwNjI0MTAyNTAwWjBSMQswCQYDVQQGEwJSVTETMBEGA1UECAwK U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQsw -CQYDVQQDDAJjYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALgd5Tta -+M/XAGMhdi44sKEp7f81kTwqUib4mkHj9Nyp0MKMo9KodV+t60Fej/L0D9Fm1N3V -Q+CC4numWWyaBK6hFdfkU5wvbzZnBx8KozMoeiPLPes6QOl9VWkq3lpBDLAcEZM6 -LMsvNy1jsUdQ06lLy6j1lH9dgIe77qNf9UxZhtCJ4ZslogI8oY3q6D/UI1oCpx5L -J42OyOoCGbTFwzwtHhMw5WKI0EHXEOWwXubdM7P3ETxTjp2+vYPSXj5Irq5XVyfd -tkCJ7GWouB9x5vYa9Y6DC0J/cgOsdrLBbvLGMTubbFjO87musc1DCb9Svpu/IQDP -PawdFTn4ASPny3vt0TMRa/O8nBkrIW84O820ddXZhBb0tNT/q+ftffec2rOas7U4 -kE7YzbQthk9otBqvPX+VeCjFJ5Kx3KTOiPGc/eyWHoJbjvwNrnFuEWIiN/1TqhCc -Y3Qq8cud601yu3dBtCrNAEDCclfnRO0wdKNgkP3u2/lWY4xvMyJfxhNsb/R4R8Z6 -VVYQ4vJXoON0GGYs9D2KURlo+bMIz9hLtZLshK+voOetmRhUmYKa1gE3UxWLmJQM -/p8A7Zihr4OMv5ioH/hnXvVcSJj9VcsPMF3Z/RhllvOxN4TBLyZ0hW55oEz3B4Bn -IbA8LcRJUwfQTKtrVHyt07NLbQm0Kp7YYO8FAgMBAAGjUzBRMB0GA1UdDgQWBBRP -/aq+8kGTfMRAmPgrNaaEHkdKEzAfBgNVHSMEGDAWgBRP/aq+8kGTfMRAmPgrNaaE -HkdKEzAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQCycqlIfQMF -bxjczVV+ootUWeiD9Qo8w3VosQtR6PYxDkFZGgP2/aD3wIRNSBpmRfnHCdv1sURB -XkKpqG3E22NRDLHwjmXjI8y5BkUxX1xwL9WX5pe9yZTn3eZu6lPtXrhcPnuMvsxs -X+aP0Y30iMEpHTxLlvV4yNso7ZucXaunWfQTKV18FuXROPgwwnbcZscCQhTxOA+d -JrB+8WD3BwwzmJU8Whmojmt4pYhzS7q47OIfkxIGugtZbtSxAWXBMGrV0KJkipr+ -omYUV1qCbYXZgh+h2+JtNdBpIZ3K+i3esXT14N9e72oysmoYFQN4Qol8lMgZ3Jdr -cTH3m1zg1fOne7KT30XKyfTr4iYoph9WHrcv1XwxwYzPbMI+HdMJX2M30V3HEMRY -RLoafxUZNkFdpWcx29Dok1AI/aVU4vE4+32YdctSJNVPrT/V+Y3dX8skt8tgrnbg -JnrFCpEuVhkNiwgTS6ktH2ecdpY2VqesUENJtw+m85cCBjxd8XYhRNoFBPQp8SAv -hEeGc+hIjXYffy6AUo9p+45uOU+RBPKH4hSleESkrI7duajEXaPPl/wJeQYhqvWp -imbAJtqwI6aCf78NOlbzWiTWJt3g+4kla6oJTInGAdYHcOwwY1KwMWKtSO2ARHjM -wCCDUCIbtJgxTrUk1Kgty5ipLgP64m29Pw== +CQYDVQQDDAJjYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALHzp+SR +T8EtrMBYNlgwIUGMZaXomQPwsjOpjt3RUsdE3LCQ15rLuyBwZ0SbLMDOazCDA7Xr ++AXHDYwg/PCJhe2N4NTzfgnWqbkIGwYLhys95Xxq+q+kV7Csk/27JSk1rsK3Nru/ +Iuj/NWgBkAQC8n10qfbJSXHcXm9wDVy/L8t7DXwxkX70LToGWsb6QW6VkiWe/QOW +QYTMVncqjrtLTCTnqGIk0s6ZIifgPLfaYJxtwK+LdIoJioSbAuVfHmVTe10vjonO +YqJ56KfqOn/G7dIf1l9gOLFBAe4jUf3RS1wkQMdk+oEGLbGnShgW+BQMfB48RkrQ +486h7slzwC29jBJLRARI2Oc9p8/zBDVph0pxkjVGka8dfIkZbmTD932h/1gfMgQl +F20G/H5FF1jk37bDcsczns0c24S1F2uJbzOlHjFLhqmH1IaVCWsYawfBs9khModW +VS6+WAv//cqWE3KmmJ2EdtAmzMCJzAQUEyrMZWmrFrBzpACyMq2zFEtyIXsCXpgq +eW4odxIIZlClibo1FGrflqN+hXnAhRyCj7WJBQ0ZPrEdRMRpBYhYdmygPJ+lWfsg +HOtNnshSuJTXGJTtsuJVr4Ioeq6tKfWGofRu4vvT6cILZjbp9VaxxfVLS7bTsjWR +c+5xHp+KbcjSfw2cJHQN85hlWTpMe9jPhgt/AgMBAAGjUzBRMB0GA1UdDgQWBBSJ +Kj1dSYN0jW+Mu0xFQrobZFmQpTAfBgNVHSMEGDAWgBSJKj1dSYN0jW+Mu0xFQrob +ZFmQpTAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQCEm/epjNWe +w0A+1M3i0y8adrXWdTeRXuP3Klgtjh3NGFKNm9WGnOAufsqUIpVwKLw1uYNIhiEA +fj5KuD7YIVU9CrWx4Z/+lILUZhyn/br5mACtRiRTF/75/QwLcDz5z7K9MyMzdL99 +DLQ9bd3JCuEgVj6zacPrALwWVhM8u9fNGJxdQANnIC8yTY5+ZE59/fn7UI787JuR +4njOGWSVnDavbTyJCPMPiUkgwqL+QSWBcNbGAPzMaAblvc1SL2Lj/ikFDAETAZs2 +T/3ZqBqHEOuVhFQYTAvMAdMQX3w8bYv/CGL8++W+qHazY+uqPypd9CLnICbnkZmr +P+id9WleGl2F//u1CQ+YA2Q3EazSFhwRLA7IKIVCrYVaBsbe/bpxxZb6+AQVfM/i ++7+fCbr7A5HDe9Fi4dClv6xPI0GZZkarhQPsoLPaDQeqM4OE+K6oPSHJnqfAB8v3 +NgTt1QuVnKhwveX5DDEP4t/Qt4j2n7AFpeoZDEA8aM33K0noXNrwqHz3991O1RWz +t/gd+cFG/Z1jRP8kYtfAV8go2nzt8QvqBhfIcNnMwD8cwuKJ5G7SdqLvDFj3XCCO +YqQAALl4QFs046eVDEWLajSESmj4fCaTmO05sHHLy7U5asoAo/MWGbmGmL+8ExUX +sPO9r12lPJ7IThJ13PSqbJIJnnloL/XCxA== -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl index c02cd0a4526..cf47b0dc79c 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl +++ b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl @@ -1 +1 @@ -05F10C67567FE30795D77AF2540F6AC8D4CF2461 +05F10C67567FE30795D77AF2540F6AC8D4CF2470 diff --git a/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem b/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem index 26616a084fb..2dea2ccd837 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC4HeU7WvjP1wBj -IXYuOLChKe3/NZE8KlIm+JpB4/TcqdDCjKPSqHVfretBXo/y9A/RZtTd1UPgguJ7 -pllsmgSuoRXX5FOcL282ZwcfCqMzKHojyz3rOkDpfVVpKt5aQQywHBGTOizLLzct -Y7FHUNOpS8uo9ZR/XYCHu+6jX/VMWYbQieGbJaICPKGN6ug/1CNaAqceSyeNjsjq -Ahm0xcM8LR4TMOViiNBB1xDlsF7m3TOz9xE8U46dvr2D0l4+SK6uV1cn3bZAiexl -qLgfceb2GvWOgwtCf3IDrHaywW7yxjE7m2xYzvO5rrHNQwm/Ur6bvyEAzz2sHRU5 -+AEj58t77dEzEWvzvJwZKyFvODvNtHXV2YQW9LTU/6vn7X33nNqzmrO1OJBO2M20 -LYZPaLQarz1/lXgoxSeSsdykzojxnP3slh6CW478Da5xbhFiIjf9U6oQnGN0KvHL -netNcrt3QbQqzQBAwnJX50TtMHSjYJD97tv5VmOMbzMiX8YTbG/0eEfGelVWEOLy -V6DjdBhmLPQ9ilEZaPmzCM/YS7WS7ISvr6DnrZkYVJmCmtYBN1MVi5iUDP6fAO2Y -oa+DjL+YqB/4Z171XEiY/VXLDzBd2f0YZZbzsTeEwS8mdIVueaBM9weAZyGwPC3E -SVMH0Eyra1R8rdOzS20JtCqe2GDvBQIDAQABAoICACUNdyVz0GDbbuuEP2lb5Uf5 -zTiwChNmnPPU0g5ylsOVtTqB3TI2zrA7qv2TzzpgnZJRtIMUOT3EeVnTB2oQsOKH -7oJtI9ppvWLgZxF7LY7NFY+AhmyBY7DZlUe95dOaBbinXW4Ypg0Z6mvLBVFJ1d7b -LxhIm+Lp6UTnsEXWuOHtnnD3Xpy/fRQgKY8K+ERoJ9kkcYdqLoKReqOFWrEDn5/L -oKLSE0e6KtclNfRTgzXIzG/qR+CxHCFo0nDl9CtqqyTQ1Oxj9RqV5yoqQyuo24cp -s8NDYg+24DYXDig5P1/CPQQ26h4RQAFdQ9EALcUAHPXxB+5Gxq2NiMkKvQgBzPRC -92yD8IDarjwizeP5hKCOVCAyspHxtOFgMS8LpHQ4ul++sOPSYV8pL+v0dOhs0aky -96PZyZWsvnN/CVNXuuTAi0RvyUDTGhxt4mc9TUe8+XqFC9P1BHaCyg+jxbEjJCWR -YnJfBiL0auXSL6R/RZ+LSRQtS5RUMNKESYNHFYVik/mQHZoy/1W4pOoe25fnQ8f9 -/80IIzsGq7RDVHgsbPCKgwN7dpDMhTmWcjrC31+oOTWFmIkDkp+dS37ErO6f9kLF -e5syBKTvbV2yscKCTWXkoE9kgCk0TVlwysWKUQtRV3JKsy//YLH/PfjfH/PdWBvv -CXRmZJTH7+Ua5RIEGoT9AoIBAQDD+GMnfYkSOPZPNv6BA3AW7PvX3lwCE5eRorsD -G1hBfYbh582/yG7pRvSTxdxH400jwCU7Et2wlxj4ClZxOyBoeaOi+8wxZB4xFpGG -e7OhGaQJ2Vd4LWqjdb178Qk2a4q8oOsciHoQQzJhTZhd+iiJLpnTIDTR13OVJKeF -Xi4FFk3GPOq7TpugMdQXZ1XB7rh5t9lDjeYRVj0fDU5amBONPKcVYO8lIZElsZjQ -EzfxAeYEJxN+SaNkUUGew3QDZI/caF0nji4WcuEH2LracshoQaCl5MLaPJpMJIOv -aCa442AO1vpRWNyk9dZDjrm6+MjkR1gYPTSaaNll3lmqr2AvAoIBAQDwg/sJOFWa -Hq4lMD5MuX/u7We5HcT1QOzlVsXBEiGiUNcQUuQ+leIZRqJSosEpgnuVS33sbc4r -fDngZJLItwezQ7A/1LqzJVgHb0Qp4Sq0ak905ibjpRd19y9E3tx47KqNzziYcp0M -8t+gOBH5tDWHk8fjcujMuwRaQoHoJvDVl7jSoPhYVh8BIS1lKvYz5/hk/2yG43AF -MIjbUvC4b3yMiPXmvoUKjZNrAM/5f3E4A7SW1aCLEJ1Humepd3pLUrZsma6qq9x8 -lb2MQeW3UG/Yrfo1NquocX9KjHawGe3vTrPWoi0FQMYVHPKQB0oeLd/dkveYOl8B -DPEcUEBDrMMLAoIBAQC0fvCEjOFVAVYLu/FJTqtM0lVbjBheaUEvCB0LTYJKP52e -MGyW+br97TaTbKfpJngEN5OTB1tcrK54trzLadP9phuLoDOAQmB0gf7jSMg312/5 -Ck/ABsSzbVxq7aS4lPChr+0cyK70j4+1g0yVMjVntJ5FkOJVbAWvFqWc9xOX5+UW -c0EX59z6/SoFrE2WY5NRuH3J9MY0raN6GD1Uv8EyUuzpifB52KLhfuPqsejSeBct -N/iCVrzBRv4Thp4yCctfDFjQd+oHDyQon/vp+9KOA/Q9F6hIbY0IGJGZW2pe3D27 -LVI83oXLAgqpCqoShdYyO2vuV2E93thtNRCNXx5NAoIBAE3/Lvi1eSUqz+SleX6e -JXvRJ3Lj0YyLSoexFR5gh+HAf43+o6AcMR9rRZawyx1wAC3RNnvmvBZkCczYMLXA -jVG4IL8CeK1B7gMzNRKzv4qUc0IApRnr3ujViVG3SB46+bBVRBBEEuQxGw41QLcO -ltFpvkfnatGB0I6IxOIJRs6tjjVYGFFlVFakcLk/Lmp0zpvKLWKs/RXhwHAHvKLD -HMviWoRCwphCg00PDWLmzkuRAA5uJssSTz8Elztg0Jr+rsQXLoqQg0cvtDF46UsC -XdMR0HNTUGWmsNX3KUJAlmWlyzJOk3UBpXsRUSQeCQ4yaEfNsld+jnKjxMkeyUhp -DTMCggEBAJs4Ng8C7Pp3q/cP8/lRaei89NueSINNXKKxzuKNg6iy6w9kOqPZef6+ -MoAe14I0UX3051mUcz1RqJU34iOC1p2CjjfM7eT8Ue9TULHeMqc1wj6xseSqZYWM -3R41p96lum179pC+hRGRl2l418xx1/bo079sB8yS/qsUiDyBIBkASgiulXkYDBER -T7L99aqXHrBlEu1S1THcYt8GOqOUuYZ7+RK9i9+Irk58zN/Pkucj0QMHN4WVInYA -kepOlUn0KlUlyn8g9Ii7gZKHFKbv4F5QIdXE5tXX88sRtqHG7GYMFa7kZMGG6bIX -7MqACWG/2mFExJnaXtgA2HS8NYPKUcQ= +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQCx86fkkU/BLazA +WDZYMCFBjGWl6JkD8LIzqY7d0VLHRNywkNeay7sgcGdEmyzAzmswgwO16/gFxw2M +IPzwiYXtjeDU834J1qm5CBsGC4crPeV8avqvpFewrJP9uyUpNa7Ctza7vyLo/zVo +AZAEAvJ9dKn2yUlx3F5vcA1cvy/Lew18MZF+9C06BlrG+kFulZIlnv0DlkGEzFZ3 +Ko67S0wk56hiJNLOmSIn4Dy32mCcbcCvi3SKCYqEmwLlXx5lU3tdL46JzmKieein +6jp/xu3SH9ZfYDixQQHuI1H90UtcJEDHZPqBBi2xp0oYFvgUDHwePEZK0OPOoe7J +c8AtvYwSS0QESNjnPafP8wQ1aYdKcZI1RpGvHXyJGW5kw/d9of9YHzIEJRdtBvx+ +RRdY5N+2w3LHM57NHNuEtRdriW8zpR4xS4aph9SGlQlrGGsHwbPZITKHVlUuvlgL +//3KlhNyppidhHbQJszAicwEFBMqzGVpqxawc6QAsjKtsxRLciF7Al6YKnluKHcS +CGZQpYm6NRRq35ajfoV5wIUcgo+1iQUNGT6xHUTEaQWIWHZsoDyfpVn7IBzrTZ7I +UriU1xiU7bLiVa+CKHqurSn1hqH0buL70+nCC2Y26fVWscX1S0u207I1kXPucR6f +im3I0n8NnCR0DfOYZVk6THvYz4YLfwIDAQABAoICABZRI14j5yen7cFVjsMyjgkl +bV4INKBs4DxgaF1jMglxlmfCUnfEUxx3XEwbVdp8SK8VzzJSfJFk7EsFnBMifBxV +rbunKchcFn7xCEqSyYnfwlb/J589cg3jJtAsVzW62MbsqT2Uc/FaiD0Z7RDDuduH +9QTRK5fO9jzthY97HqhbL07C/Kc6Qi3DvEC2A9y1f1WegcagYmkgIzvgp3PPtqXu +M4zTZ2inhcQQeCzqgzE7Bm49hAkHt0p4Ej3n1u0IMjF2lF6t9mq/9TCRzHJX5V1z +xrPBYnrAV1ihL1gwlk3g8grPnCbwOmzMORuaTdRd2HcGQh6B4f/5CPRUwuY5nkY7 +UMcX4rbRCcBDzG8s/NllTRaVC6yvEJPN4B/zOek6DI+tRy4tRj+BZO/1771bindD +nsYAklxbbpTiS7B073b+as3RFZW0xvmoqLyhzRQNW6GqSGj4C0KVigkWQ7Br69b7 +O2++oEurmLqI5Hm3RsJeJJ9obG4vKhPUPdtStF/W0W2TO2i0gpTOm+NeO5uYBRB1 +6OvhJH9dzMi+a0ekCpdQeD2xG4NLzwSe62/Ozz9Oi0rpAqHHhu+SvF+WEepLbkyO +2zx/OYpFK47idBRCAlHLC/9UyXpvw2yU9sleElVuHM41CzMe8Pwj3Qk0YdiHHDzS +Y19XEVHh/riXUufsAHVhAoIBAQDpUe+UJLgAIT6boz+MPLrPIGLeazCySEHEsdnp +jGuAx0M59PIDn5OeXQpMqCIRFDw6zhA/4gji0UegFRPIGA3Qduq+GsjVYRt6SHLC +N/rBRi2xg77yyKOMxv/+nwKFh/3TKIQbUc9EQj63WGBGCHu/EyMV7i9V0j8e7D2u +v/Z23nV/+XqawJXi4u2bHB3M/upOoclKfb3ewZBVouajzZc92kNC8XYfPn10Eofu +Pz7jxDX10IJNmzIYOozO9mlBsds7nFIBXS5wMz3iVg3GEUB05pPEKoEtZGrw474u +0M+gW9d7PV3qYdFgjSogiQf4JrSrOwXJQL/26nyfRX9QVplxAoIBAQDDP+fFT7Zl +eFLvouZu73lr++bV1+LogHgX+GHCSIujBELPyFEAyAeElFKFqnJ/NEOuPLG9X7tL +PDhh9NUijcBTPhVvwbH2/CRBdSX7Yf6RHh5fY+2Ik3hTF81L4bQcf0fgyX4roJY9 +YqpjQWFYGmSk4niCqWd+re/ZrYx/zpF+qgN21v37BAZNOCI+KidFScrq29p8kpLj +MdBWa0m7bhJcv4MPO46s2EJZVdczBU7iK86v5NVrGz7fPVm+tGxEDpwhyfYiu961 +U05XzT+trAaBa88KlSKdmUFq3qDdC4bFb6D+Ra4g+mhqNGtfBYXsavnENZgt0N99 +9M/dgaAEa/vvAoIBAQCm4YcFo9nDpgOc2H/Mc2d+XIC661oyOkJoSHk/dcMyodNw +scUkWFACdjO2ro9nPdzyho7S0n5elSew1UKH3MSMtXGjNY8uJ726kfUa+2UsNmDa +VgwOpPlt6KwTV3I7RhCDprgOvk4MWYF4LAr4LHsuKKbwuaM7tByXpotb4UuMrALI +3Q0XgOX0GTGvvsWF6VJ3mXpbAGL839+3kMN8p8EkaWewivpc0Jp0mgiFnNEDokSi +JFf+4CFNeRtbsJ2KcocHNQDmntpnQA9kQv6pC4/ZzU4lge1RJUDkOVC/NXU8ElSm +fjcdPIfAklduW/TKRgz1aEr0Lo7fMcqfNNsiAD7RAoIBAQCaeplZ13OsXMLhrrU6 +2GXtNeSxFJoG8n4SGQbfvJ4eYGSsGQVd5OVt1BxmfTERy7wwwvytpGx/XioN9rQb +HqQoOFqljU7M5zmYQKPIfQP4tSe6uUlaYbM1qwNXIkBqu5mXFFSrF+dGsiW1Wik2 +l8tBWZ2XY4jrBZtbUqBzDnC3ErSi9f8E92408lDFdnyTqYrOvxviq+VjtCnt9fzk +OnZ0w2FiT/DWeFQmcnBNgcmj0J07NYZVs7zOy6+R3xY50oVdhnkjihjuxfaaKV5U +fmK3SyEIcm5s2rCTaYlE2rXKyENMar0WgojSXp8FE02efBUZVH4O4c+xzFwaGVEN +rpIpAoIBAQDnAmhVy85n9oX6Bi+bXg9aZFa/teiCX33lCjNvNeJ5GljGqO0X6423 +6FVg91BYvnCbHQ0frqyKimVxNO/XYxCnid+Or48Cb9LwD31Wyip3UZABljg5sTb0 +fiNK0pxx6G8x1MsX0u97LogGwd5bMuff2lMi4xpinkz6ydB+fYsmM0UXGKsNkB/d +zR1izlqm87TMeQVi+pVZuOLmg/+hXVgISI2M7TlSoytODmpnSg5SZbaT7ut1IIwm +hJdWMTPHLv0X7NwsvhV4Knu27BJBw4470+okrOLfDuZ9+LZ6JHvYg22MywjCJx9s +MJuwAZJiZb+dQc0uwMkAEPMEfOQ1BI1+ -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem index 3a953f1b941..068855d96ea 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem @@ -1,30 +1,30 @@ -----BEGIN CERTIFICATE----- -MIIFMDCCAxgCFC1il5+r7ZqfWcm+w5gJP/pffeI1MA0GCSqGSIb3DQEBCwUAMFIx +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRtMA0GCSqGSIb3DQEBCwUAMFIx CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl -cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDgwODE3MDU0 -OVoXDTMyMDgwNTE3MDU0OVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDYyNjEwMjUw +NFoXDTM0MDYyNDEwMjUwNFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE -AwwHY2xpZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMGHbRBS -+W5wgB0Nv66ODxB8lU1xCj/4imLZPZcLzPdtL0fR1TS+G8KjGSrxznEph4NbnFR7 -cE6fKtuC/OSSUMcBeRiXZxjW1+uYjGp6HGJYuyaeVKMrhP2W7BD9GuMu1l0k6+6K -R8gGCW+09ROH8Uy6rvCI+aGl6pgwabKzYWIu04EgIsdCgj9aRpkyxoTwFdriDJng -bTutXrPCC+LaozYJBD4tnwBzSfpMlP+1rEPc/Mpt4beDyLA9vp15uVYQNaJKopvu -CoflqLE04QhcMdubsUjQSikutz5x/chElyWG8w/5kjAKZmdwJLa/yxg3NsCoPdmS -j4aVwQwK0seY5o6yUpdADc0W2BzQ8veuQoSX/rWsf/lWQe2VG7aEn84TSJoUWwgX -LIRQuFdyjE13w0VdprJo6Z07Yuuo3cJ9dnnmh/LOZL84tLC4o4qSsnb5UjjS7HWC -IDVtsvz61tKyApJZ1IvsrEshj602whrIDTCZ6jMhpBLpZIj8GRxSRKxpC+Nqu5Zp -sovCNWbpYAkBns7svwEZaRfKY1Sm6bbgcZk1VaMKUIPHqUjndVkKjZ7SeptXCf2K -v18xEaXDLerg99IhRzUlNvv/MKrG1Y9ukO2xb7UBvFPzkeiL09MHTR1bZcvowiSO -3IlBvCixjuHur5UtsGX4wszpSrhtaCYqZG37AgMBAAEwDQYJKoZIhvcNAQELBQAD -ggIBAFsyHyCiYGXGB1M/dSqnsXm4t7VtnaAKJxNYGtx4pv23jdgU56QgLpCXU6k/ -tOE0uNLTbIuDPRGPAPNlyazFG7Dk6OuEQZ6rTGpcgvgJMZisLynSUhSC1AO+10F+ -w/84EjzF11SrU/OuOh8UcKdNBQVJUu5MU3BVzoK6h+g5iG95KDAPpk/7yw46WMJJ -HI7PR2H2Xu8/7I9LyRl1kqMvzOEvLX0bLlQw5HZ8H/kEgxSOtUG8BqmQgxHL/EwI -kfYu7X7t/f9oqzi/AlFWtBBQg/SPDPpsf7uyxfcjduCiDtNxjAa7OId40WHrLoCD -5NqU8ssphuCKf9kxFp16SB8tkVjo7wJoWCgr8HHqArdOVpS+RNB3fjwvnDt+JXM6 -Xi6Ui6WBrTp7T/VQS2jMz04BbpVLnJcQXX1ri+zqQfM4KFsroZWTz/+WMdSD/tHS -6vnfDUKFEvN7GdN0hpV94r+YinbC9UTgRC7V6prrao24mU4EjaHjQJ+c6tymNzye -azPSoqJiYhDdFq/txxNp+OusBshz5sAl9yJye5vvvdCsClG/6USWVfcixBw9vhcp -m6LmgZd6Gc4cROHG5kGQNwPG8IHfr9hljGQGxnH2lvcRt8t/hEhP3NX+G4n4ihKx -g63Iv+ZMUHnHLs6qfQo7ll8150IrLTXubEKnH6M70/75j4wq +AwwHY2xpZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALaJgDgT +PPqKnBNHrxu/jVwMpfPa9ENvaHbtSi88jQrvu2O+nRV/EoOBb/ol3pQn7RveMyj0 +PV6Io3KzmkMQRcX2Z5H0BNPAM0WijXpzfxWSdS9yZ/BKbpyq3QfHPX9FTAcnnn0S +gBgKUDfSxnQt4tn32kNv09q8nk0H1JXzjxILwKAhllHweJlyVBt1a6AG/yNe9K+9 +atyhYgwuQHv7z0OdbF8NfBWhWODLPkZ4Lj4LktVakYSi5XS5sdTtYXwi8mEDkucZ +1CrlZswIzR44nlmkJVoZssMbGphLzxWw/XiDn2ZtO4QNU1cJYU/WbJiMY+SUjIyr +AsLf2GAbU5bUiluG+XZsYKUjvrzVV6gYHWncXPDVWvjz96PDRsc6M19rsrhMT9N1 +tMgydCPqpxJkweC8/IRt7GZhTlxQNL3ioFsjksPmvm112012XYHd+NiuVIZucY6P +c0dFRWi0VKZUjvLVRbtxWMlSawTk0S/C6sbL2r556GwxJTwkm+EIuK6nGDKg7Kmw +yLFlSyqtXkvUOnnAnIOzEH3VdjUyUniUbfFT4ODs6TLzIkFSSJDN7W4klP6p1Ot1 +ZUkB030FYpFt1r39AfWLPWLjwzKvMWenWaTSpZIRO3h8sXbh6gt7zVZKNMrf8AFJ +uyOnfYaQpUwrxvWvuJdWZETS7lFgoRrJxGDvAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAJ8XAILsGKVXyCBMazuq3k2rup8kkNU/hVg1RuYGmy4bNClNN6havzu/S1SO +/g00+tlXGBlSiPlRXq/p/oZgOI/efqIWSBfcmHuR9STdELfa7zugdFpscgaTOhTq +Ko5o1B81pZKw6wzVfn2JlGxnEy9e+lCC7ptMdFiBqc7SGrapkjCjLReszn1Jctk/ +9lsSvhWZ/7GhvRO/L93X3/ZM51K7VZxEwFnibULApObDZQBFipYdfKlrrVrqtdj2 +M7Plx2Hh+Ivt16Kj/DqRcRLcWVlaM8rp4QAtjn4bDYArFEGGi8ElWFRNjs5ztE12 +f0Iu+yqGmvDn0lHEocNf8fgxHIN1uJ2sYImS11Yn7xHp5FPb7efvYh8Ih6voCaTg +NojHi61q26YIU112A1ylStV4xMKgxt2rqRvmc6UTnWDtzNO9jp3NscQVHtUEJpv2 +Jd+JsDf1c/w42KTwTyOAz5j+D0acRmw1YRsv2BpO5tcly8nvdMX9k7JesdiQL9bx +ik863yOLG0AOMdWtZPVG1BAuiUUlbBS2RRUp3qsP4OuJ+eVKlXFieX+2NuzqyddV +CywaA+R05nutX5R34h3Cm2MmQOERAk9FUeHFX7cZMAXQRcmoBZKtUfKOGUKF0zOT +ZEs7xmHSqAOTx8ufDU26pnbyCxOBYwn1DVX9nYSskMGMSfGU -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem index 767afd6e2c9..8d9b887b033 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQDBh20QUvlucIAd -Db+ujg8QfJVNcQo/+Ipi2T2XC8z3bS9H0dU0vhvCoxkq8c5xKYeDW5xUe3BOnyrb -gvzkklDHAXkYl2cY1tfrmIxqehxiWLsmnlSjK4T9luwQ/RrjLtZdJOvuikfIBglv -tPUTh/FMuq7wiPmhpeqYMGmys2FiLtOBICLHQoI/WkaZMsaE8BXa4gyZ4G07rV6z -wgvi2qM2CQQ+LZ8Ac0n6TJT/taxD3PzKbeG3g8iwPb6deblWEDWiSqKb7gqH5aix -NOEIXDHbm7FI0EopLrc+cf3IRJclhvMP+ZIwCmZncCS2v8sYNzbAqD3Zko+GlcEM -CtLHmOaOslKXQA3NFtgc0PL3rkKEl/61rH/5VkHtlRu2hJ/OE0iaFFsIFyyEULhX -coxNd8NFXaayaOmdO2LrqN3CfXZ55ofyzmS/OLSwuKOKkrJ2+VI40ux1giA1bbL8 -+tbSsgKSWdSL7KxLIY+tNsIayA0wmeozIaQS6WSI/BkcUkSsaQvjaruWabKLwjVm -6WAJAZ7O7L8BGWkXymNUpum24HGZNVWjClCDx6lI53VZCo2e0nqbVwn9ir9fMRGl -wy3q4PfSIUc1JTb7/zCqxtWPbpDtsW+1AbxT85Hoi9PTB00dW2XL6MIkjtyJQbwo -sY7h7q+VLbBl+MLM6Uq4bWgmKmRt+wIDAQABAoICAF9pe5lFNyk+OMNaMGsIKaAo -s9kyxmlkiSGT1fweesZvo5RGfNVcdsY+b92OyodbAK7/3vJ9yxBQA6qmiTTNeBqs -/L1lg5Qo89n44x1pp43LDjXLcJHjllUJ78euaW+g+1l/pvv7W8lfRI64yez/pDT+ -gtoexefuq1qt36aVLolf6bGrHMpdXWaLhOvprxsCXNwFf03r6h62ak64d/C3dyjg -SCG5jz8DF/FZiUoKkOwCSvUoOFSoGazePBhaV0f5hN/G6SV0NefrD4CSP/HFtmT9 -fOmXm0Bzgi88tvBh9JyxTCtPkUWzEuqjTM52f1QogIt6Rsf6LF2ffkYIHgP+u4e5 -dDFw2RAZg1DFZrZOyIKW+fyDqftdSj5LEN/AKofc0os4qzCXktcl+x8KSxlGnhn5 -i9V3ZwvaV4ivMxWQ7EO1Jftqgy2OBwTiFHkeVQ8eC7676pTx18UmN0L2heybA0Rq -I9mrzYCDYBn0HGYdb0xplPpDuj/g7zLx+Edn6CEoaXGwU1kT7SoNwfVsix8/+k2x -jYee+f2EjbHmBKA7UeXkLcMa1QpjIipMX5gWBYa6ACV+xdDrvoMDjrRx7A2j4FF/ -T/tv2GjRWeqWB6bySVA7l92Lltm3RLsW82GBr7I9a8xScUYLa0AfNeeBmS5AIgCA -5FhQ7ivlZ56VWemfiTXpAoIBAQDNp6syo1svZJIBQiZIQRxh1H080wtYDi5Yan2R -loQbjbEitzXC1+rFNtspHuOc0frv5XBvbxOJwXmEsXcS8sWDQBKadr4RDdPGeMDa -SA0If8aWN2BjcILfY9VjJzbL4DY2Tfua9OmIrGvx0LMfPpl+6LXgZzCgPv/o+39i -Kv/ISHEsJITloVeuMq5Py0k6+KXepYWAvdrLkEfxg0gl7Ds2TZDOs58gZRb0lW9+ -u7VOtN9EFd6KZtaU1HfBXUBM57Jlt+aIBOGom8LB/XX7bBi6puIqZOK4VqGR+29j -wj1lnX7npfy8dZoCEplTvp3AY7pTO1BrkNluOcacmnSTGtfjAoIBAQDw59P0M0/z -82MwpGCDilXgqXY+SWk8JvRY5raAthqWrjiAkuA9hjh02PXmRLhExs9owsptuP5q -CckFOyoILZFA4IexlvPgyW2rPZFyrMpI98YYG6Lyw0aMSyDX/rBKJA9cServSjzc -1eLT9uPP43L3EkWLtgocHnD1lZ0YSkAe2ehTGEiNOnaW5H81csK07LPYdEvV8wup -aFNZdcUMJt/V8/C2nkAQpLsgGMLRKYgw3KX7JmtkQsldjK0j9rl7eVH65LdjBXuH -cRMS1qRJgHl5+Yemr7890aGomhD37RMYUKbd0NCQIn2ISxtWF0pWra8+zg0fhPiP -EGPbeNVVI30JAoIBAQC1B/Vhu/7Wtzb3OJ2uWPJ9A6nC0xxXRRRy7Drg35gnERFu -t8vxWlPliqZdij1enFDCwDu7PBH833zy260v1tka8lnt8rzZEkzrlvxcqbQfWSsc -rF6C6lWqA52hjLFlwla2cusauqCgmPbkhIxI4rgHyR3hDbT2Or7W/hxh8+v5CBvn -ebYq1V3zj9V1lENAUATi+t2MOJPTQYyzApeOGmb9JEZmTiOzRolwf+MHsoClaf5n -VsDxIBmgJW/NnSKvD/4wIDQkY/eojoRgc5dZ3QvfsmvAWdJh5pCPir/BFwko2/0M -OUdDNlp9nJWv7Em9Q4yPG9Vs+rMLnnxA+o3HuId1AoIBACSQWzg2TY1ORKDOYiO0 -7GHj9qFvjPxnQTD5G3wfp5t1J/hD0qsj4w/BGllv1rQBpNtWrVjH+j1n7M3RdAi3 -udMqAQ5wReW5TN7vwlKwbSd0C+n+z9We0+dZQ8vkyScHoBk20uSs1N4DzKC1WVBl -Sj671DhnUdOAv05W/fgA1QiZtExgZCqjU/qFBdW77Fd/kbBpvlTjxcJZpkTuvhCh -GdokY0WkcT7Vcd1mRLNwZU5dPwgGhcg65ss/HcxWl0JpYIr/CeKKo3wkKmvyjg7l -5AoiWHdxN0qPtcScVbT7k6leHGWQWwd1ZK46EBUaBdtwEygqKA2/peY4658VEPQS -JdkCggEAGLjOV5XjLMliu8tWrOYE/nBZT00KOd4er1nojcxrRd5q2MDbqMqSi5xV -hibl8Egasdx+Sc8+HLazGPFizNrxd6Ahs3tBdUpYClk4dqnV1X9XK9WV1Un6ULD7 -qzhYQ1gJhQLg/rPxg1BLR/Gl3LahlDdaLvDFTEYPz48oVTszC1yQ8A/Do466TuXf -qF5yPBkt/lxasqMa7ZLCUBMZDF/FGL6x0Z9HDcJc9nv1dLFc5vggoEzts8S8Rmue -WwNcwQsuXEuhpRv+Uf3pimoNm7qfPx9vKS4qxHswHCoC4yIxM2VaALUSCq4KicII -3UTpTl6z0FqLEeB20OY2dIyoPhdNoA== +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQC2iYA4Ezz6ipwT +R68bv41cDKXz2vRDb2h27UovPI0K77tjvp0VfxKDgW/6Jd6UJ+0b3jMo9D1eiKNy +s5pDEEXF9meR9ATTwDNFoo16c38VknUvcmfwSm6cqt0Hxz1/RUwHJ559EoAYClA3 +0sZ0LeLZ99pDb9PavJ5NB9SV848SC8CgIZZR8HiZclQbdWugBv8jXvSvvWrcoWIM +LkB7+89DnWxfDXwVoVjgyz5GeC4+C5LVWpGEouV0ubHU7WF8IvJhA5LnGdQq5WbM +CM0eOJ5ZpCVaGbLDGxqYS88VsP14g59mbTuEDVNXCWFP1myYjGPklIyMqwLC39hg +G1OW1Ipbhvl2bGClI7681VeoGB1p3Fzw1Vr48/ejw0bHOjNfa7K4TE/TdbTIMnQj +6qcSZMHgvPyEbexmYU5cUDS94qBbI5LD5r5tddtNdl2B3fjYrlSGbnGOj3NHRUVo +tFSmVI7y1UW7cVjJUmsE5NEvwurGy9q+eehsMSU8JJvhCLiupxgyoOypsMixZUsq +rV5L1Dp5wJyDsxB91XY1MlJ4lG3xU+Dg7Oky8yJBUkiQze1uJJT+qdTrdWVJAdN9 +BWKRbda9/QH1iz1i48MyrzFnp1mk0qWSETt4fLF24eoLe81WSjTK3/ABSbsjp32G +kKVMK8b1r7iXVmRE0u5RYKEaycRg7wIDAQABAoICACgRktW8U1xj5NLOn3+l0q/s +DtmyrH/JCtNgTzKDRiqqaSYCB5VaaYP4e84bVfqLsR627eAFjRsdP1PEXQ5vmgFU +j3OYbx7UR+z3O7svcywXFCYwJOS4UgON9iro73Tqjz/a0I1/7CJa0TUPzYRfNjbG +k2DOQWD4mn8qQt4Pss4xSj1cYhTmhnKYiCHm6pMcNhFbnLafC8AWpOErnfgZVGvx +OIK9AQn2ev4NX0Q0yWHRRJAU63CEGX4/7OtimE2Zlj75e9vC7bHk3WXYYL5Li2b+ +Azz9+yGc53+a1IBcc6dqrSjcvX3FNxAZ/QR7eycZWiwo95lBSL/iRysBlJ29VglW +YScc2Il/xWNp97PORwsJEDpeWq5UYdinARFK6PAGjnxmADZNAeZHP+r1C5CQaw72 +y31aIrhL2s9wRPZ2DytIlkSmvffIoNpZJW2AyVdJn8L37Aot0Hwr0SsU8/zibvZ7 +4d+7/6rnPnE1jTZlpnDgyH5e5Mtn3YUYDlPAEQudfYyvh0QrNfSOMnetWSYTh/Oi +40iQM2vaKDiK91deTR50g90A88eSgxWMGG6WUzqNoE5CwwiNQxHPhrmFi4H1V6y2 +uaF3s0Gx6aF6j+ws1ImbgrkpAbvgTCENoDtmS8MYbZjXgTzwnG4UtIwqdc5bK2B5 +i9mdb5w1v2v6XLUxVvKhAoIBAQDhVgmw/ssCscde91dWLMylm5Wf+Q7Ry32eYSr0 +UXqYUbChHkYNK5HpVY5b6Br7C13FrhboDk0hsz3agOFexcrua1N2huyJ8gGjlAzz +i+286WuwbhsX9rYgqTvAZmQYpklAfWLZH8nlwtt3iafNhgSVaa//A2m4hhZagElT +pctVakSyG3OYaNDTXBDOnZi9xagc3eWmxkS8PWFaYC0DJCw9yf+9ynH6+FRZg75x +t7nnDd/eSxtW9QUALUCheOO+yIp/uJUiIyWR69cfojQ2vNx5t8FzpK6EqHFCujhq +e+kJB81BAc2P59O8oGqw9fvc9pzCQXyFbx7dtl/Xu/JyMEqnAoIBAQDPYH0afED6 +qyvtQ1le6bjLW3jGCaymflgGwc0sm/pm/3XY4WXhrmqeSIF3tbhP2HnANGinW0wP +nFd0vAi8RU9UxB7iIUAZ6wXRS8/YQmv5zIouPzSCpmvW0l0IcnqjDUS0IZPRo+UA +FTzS2KIQ/yOsHSZoVNQe/Tsdk7Z8XVAJlq+YZ7o7pGR25yGPleUUbVwbIhoEiBPq +EFA+4BErok4CFQB9J8jLRdzmTEQFjQ/w4w066ZkplcIy009a4+LXIvL+MCPG3qMD ++2K/HlTYfMd+CyozaF0ZGTECtakrK+PWbbTj+VV30SD9Sckk8ZqIFUq18Fb574gF +K2KSq5SkYSh5AoIBAQDdenJ2HEkvcctzJQsbsVbII58yKFsPi8IBjKHql7c2xXwl +MJtL0JpOzH/rB7yVKXvWk6ECHyRizkkqXeil/STTqHvVkRInF83SmO8N5mgaeRcW +x3Ir4JrsiUoodrtFmxN+pn8kx+DqytZprMxY7rPMo5+PuCwOaQTJmTP5Woj7gELb +CK5ajBNM2z3Nxwrc48yz6soRXOksV+w7JzK21rQBW2zZf4T+V1yYyyvBnALF/lYe +qJXLp3Jt1QykaSz4VSYEGUnDzuXbggHknspRTtopbJpg7ul1jBYeruhKiVXoQVnV +3k7MdeEgkk+rdWtDqMU1Daa1hB3Db8DOS3YmFB8bAoIBAQDPDD476F0UKTzlWf3r +9pzLZNuTlmsrnC+VJ4ALjvwWQ+7MiFapWfQXbrrc47FO/wqoLWtj1JJ/b5Ad+/MY +znajYmCXU61lczLOwcuV1tNph59bBz4NR82ZoVTDr1DkZMX4tyGYCPQF/i5JMYO2 +Rpa+LCiBuFhFTH3uTOHBD4Vu3WUaXE4jaEHqOWBXtMgQehOg/45MgfSoGHuWGy7p +itYp3AAt9T/UPD+OLA0qIaoNzxQRgtOqIlzPVA0B6U89jyZfRX8i+nx16FKyEL2T +nBmtrcYHp6Zz/aPiWa+6a8rB96zIhNOhmko+uaG7YgHw5pk+R+T/C/mZd7SmTetN +p7e5AoIBAQDXqOVl33+eRw3CxNCJZsfglrD/Jz8VuZv5MZwRolEB4IQwm/whXdnT +34y0UUpUQHnVepzAP3QjsLKANPUY2rKO+f8NAX4Uakzn43QLI+hZcxx6hVkV6OkJ +Hi9fwSEBZzx5DWEbxmYMIGlaRL1yVff8wevQci7WA4rrztb9D5B0c49ItCrMkLNs +X6+9Bh4zafL/FxJSkTahQLe+KGNXSGGGrYB9M31oLSKKM955ZTRnICPxuyA2hffx +8lmHZ/5hmP+eMKoAJ9khilX4LmnkdXJEZ2w5lQTPUTNP8ggaXvFWpijjUsaXEdkR +NMnXQHpKE2RaT22UJ6z3W+biQqNlhlVW -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem index d60adf1f62d..d5cd522bc8f 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp -ZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMGHbRBS+W5wgB0N -v66ODxB8lU1xCj/4imLZPZcLzPdtL0fR1TS+G8KjGSrxznEph4NbnFR7cE6fKtuC -/OSSUMcBeRiXZxjW1+uYjGp6HGJYuyaeVKMrhP2W7BD9GuMu1l0k6+6KR8gGCW+0 -9ROH8Uy6rvCI+aGl6pgwabKzYWIu04EgIsdCgj9aRpkyxoTwFdriDJngbTutXrPC -C+LaozYJBD4tnwBzSfpMlP+1rEPc/Mpt4beDyLA9vp15uVYQNaJKopvuCoflqLE0 -4QhcMdubsUjQSikutz5x/chElyWG8w/5kjAKZmdwJLa/yxg3NsCoPdmSj4aVwQwK -0seY5o6yUpdADc0W2BzQ8veuQoSX/rWsf/lWQe2VG7aEn84TSJoUWwgXLIRQuFdy -jE13w0VdprJo6Z07Yuuo3cJ9dnnmh/LOZL84tLC4o4qSsnb5UjjS7HWCIDVtsvz6 -1tKyApJZ1IvsrEshj602whrIDTCZ6jMhpBLpZIj8GRxSRKxpC+Nqu5ZpsovCNWbp -YAkBns7svwEZaRfKY1Sm6bbgcZk1VaMKUIPHqUjndVkKjZ7SeptXCf2Kv18xEaXD -Lerg99IhRzUlNvv/MKrG1Y9ukO2xb7UBvFPzkeiL09MHTR1bZcvowiSO3IlBvCix -juHur5UtsGX4wszpSrhtaCYqZG37AgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA -aCqXR9C9dZPY8ohqtRTPYcmYTURPAMIRnDvfjtUc9896GIJYR696gh3s7oxXo4c5 -85acI5E8LY0zrtq1yOHRSlsmoIp+Tm36a+K1C1H8IZGvwLhUGXogj7eWzGOTTFx2 -OK9m5QiaL3w4p0P2magOlDxA/PCJ1Uqnz5eWbdHT3TBFIgwpRosK7j+/n5eLfgP8 -gaAUhEW1cUxIcFy/nB1TUhrsj60IaDgfgDsKq98c3sDIf6pdzrRuk6m2ur3eVoHp -1gcDn/XhVcF57cvhi0kdBNA65pKfgvHVhFx2YUdb1nlPjkwDrkWTF/HyRxMxs7a6 -g97PvBHvTc8wojnZpRbXdQyaoNjAhJzpcXaJ8qPU8+P8FnFFEsX94nh+u0FTqtZF -DRm8opUwYGrPCznb/u70wlMMgeGFD8BSQ83TfwlEug6J85Kfh0Vp8Z9gD/GNN4sp -RLFChDgU58TmaG+gFAufhUJjDoSwZ2LepwhI585pdePvUNOL+q4hl6dL9pfGKVxu -gwdvM345CJGwbIOhnol6kfakjp3mSqejXGIjnxdzbTKJkGqhwLcL3A06Y37xykRJ -nkHN4ahhLnFEc/k9O1SwcvTTR1Ct06bYGRNbVrjy1RWCsjyCWokSArOdslh3K8K3 -rva3aKss6TWYg2Qjce10pMaluRbIoEkx+0iII9vujoc= +ZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALaJgDgTPPqKnBNH +rxu/jVwMpfPa9ENvaHbtSi88jQrvu2O+nRV/EoOBb/ol3pQn7RveMyj0PV6Io3Kz +mkMQRcX2Z5H0BNPAM0WijXpzfxWSdS9yZ/BKbpyq3QfHPX9FTAcnnn0SgBgKUDfS +xnQt4tn32kNv09q8nk0H1JXzjxILwKAhllHweJlyVBt1a6AG/yNe9K+9atyhYgwu +QHv7z0OdbF8NfBWhWODLPkZ4Lj4LktVakYSi5XS5sdTtYXwi8mEDkucZ1CrlZswI +zR44nlmkJVoZssMbGphLzxWw/XiDn2ZtO4QNU1cJYU/WbJiMY+SUjIyrAsLf2GAb +U5bUiluG+XZsYKUjvrzVV6gYHWncXPDVWvjz96PDRsc6M19rsrhMT9N1tMgydCPq +pxJkweC8/IRt7GZhTlxQNL3ioFsjksPmvm112012XYHd+NiuVIZucY6Pc0dFRWi0 +VKZUjvLVRbtxWMlSawTk0S/C6sbL2r556GwxJTwkm+EIuK6nGDKg7KmwyLFlSyqt +XkvUOnnAnIOzEH3VdjUyUniUbfFT4ODs6TLzIkFSSJDN7W4klP6p1Ot1ZUkB030F +YpFt1r39AfWLPWLjwzKvMWenWaTSpZIRO3h8sXbh6gt7zVZKNMrf8AFJuyOnfYaQ +pUwrxvWvuJdWZETS7lFgoRrJxGDvAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +L2KVSFlGtuYjmV6sTYF0GlA4V0RvUTbLM7qnd321957ReDR8iRqj9ZnqKDEHeH9g +jfuW+TV/BeQFjTTAXM5Gy+LRz23xOwqcvxKh0WYjGLhDXIjWx1zzzGBaAcyWMllq ++o6LAUadST3yhfncP0A79hPLxOsPgMGd3OXGajKskDNmU3MTTsbtQ065HsBwo07P +leMx2jDkapesUaNaTmXERg6MT86hsknGsbO/tU3dGNy4hBuOX5O6bEkijF6eESLd +U4Xc54yScVvxpWqTSEAz9xHjIOpOZNfW+enbLdpApxq6IZkeVM8z7yy8DNjTJ2SD +aS/xKexqrWjWFxNa/CtKezkaZgmLs9jGGan+hmlNBeuixvJEekPliv6Syj3wvLp/ +L3/PmLgBzZj6iRdw5fky0swCn1qwpgwYRjBSN+SL0E8yG6BGKFWByQfwWbdOu9DS +lN/CPBe73yi8kYY5gBvBmPsrt3VMVRbXBLNM16jO6lkyYzyC48jTdicqpLsHazZn +Z4I6GZoUQKc9WPzSdu6tEXjM6e/2lkT8kaPmrae3JOKnP+lzjZjfplV1NylICNQY +whPWBVGaRg0dy8dZSTGtzygTNMoHS3zYsBGE4MuGZtm/4+x/XLkz32n1k58wAKxJ +JKafNaOReYFxJKd+ML5XnYOVICuw3nxQY+CeVZlz1Bc= -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem index 996bcf41d94..9d317f07963 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem @@ -1,30 +1,30 @@ -----BEGIN CERTIFICATE----- -MIIFMDCCAxgCFGtzLs/dg4kQgBYqaATBtAwv8dB3MA0GCSqGSIb3DQEBCwUAMFIx +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRuMA0GCSqGSIb3DQEBCwUAMFIx CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl -cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDgwODE3MDU0 -OVoXDTMyMDgwNTE3MDU0OVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDYyNjEwMjUw +NFoXDTM0MDYyNDEwMjUwNFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE -AwwHY2xpZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPpDQjGP -dPUiGXybmTEEbneSxXDaz/+2H4Dl9QLq3NnqozavqqZksseYsvOwLbzdZ92sRLHf -5B1Wwo2h3SbOGtb6CD3NMqV5P/nvHghn3KD60l5Jy81W8aJ+pwra8tVf/O0oDcJ2 -qwHABhMYm7cf7vk3Llt4clQ2g9wj6o4UuCFjGXDqPLxO+xN2Qtx9YZdxUGTrUtww -FgaoJiUqoeuagMtJ2OlmTsRM8VNnddLqEnqTWNtV3hloH709fA4RzudCOhHDwx2p -+zKDwDTnOFgOVaX0I76CZ/eZ3qU4cwIZ5bJaJjowi0XCP0Pk3LYQ+wPrDsIuP1td -xVAlU0rXXePxi0yrwDLzTi8PPogwkMSkfSRf4xlgQRlnQjdya4A6h7VTvh92tpMD -i6EP1JA1nTNebOf5AjKwNHQ6B5XuTRP0PEUIDAfV9mNriR5vnn+oM44AM7FQ+tFw -Jbc8CQX6487M9KGsHmVsf60fWCBmgZicof0XSpVrnMDJdGARzgmxz4z/Eunrr9uu -p0SttZdWns1lLwWpbnrCk7I3F4SZ8On3Yf+RxFLZvBJjvHKfRopi81YkralkKfu+ -Se6TE0QpkPEEaXW5zqvVkt1gW9j79zeBZRFzjuT35F5m7fWi6e7V/W2crtw4lGxH -/LYX397ZP7i7cT6N+g4JYkeLgMy18S1jiZtHAgMBAAEwDQYJKoZIhvcNAQELBQAD -ggIBAJjXO5KD3JrWkcfdvYgdXVRLQDVecwnvUDAP1R2Cw7+iZRfmWccESKTly1HM -+71ThCt7wSFqSxkE/nl6/4cKgNGHG1Zw0Iy4RCMmA5vxiNzgSmUg/3jyl+smQZzf -8e1iDAezlsEJrohCcUTKocv5fl9qWKspZ7Kc6XKQ1q7YbUyh1ZZpzh3mHZ6XhSo1 -EGMXrlcKUst/hkKGiuONOP3qRjsb+lMRZ1IpIB1uIT/NddnQw08Ah25nVSRtc0z5 -b5edvvzspLZ25brsHBzKtHHg46FwMTi+UfMgQoEsV3DXNB/sWT5V+60AYrPA2mss -5MNhGSQlteinYD8f0LxO2Ocoxl5vArzlgDjx+BY1H2etI0xv3u5U09FXqvnbNQj2 -5kbjJXI+wBXxi+CSy029fBBaU4OpjT3TM8VTFFBL1MEe38ZkAX47HcWOenN+xHsq -dFDZ1so5ZYRbPgPhytEE0CWL8fNatjtPCTQTOrZVZW5uKJfUJog6gYbe/YxSKfqx -QtDf10xpSB8L9ooCyyBIx55YctpEtCuj48HXe0vMBBQvFTx+C9XikqTqGjc2kIdU -GNK2uImXLUas09UTXzm4rGTgf+hM2ixHd8/7K0TATh5eMlK0td/unXf+/yWmBEcz -FaZyYeygPcv4U1NXAFdcY//5qZuIF38H4HbQ2QISH5G6LPff +AwwHY2xpZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANUgBLgZ +EjiW3LmjGlCu17h2ERYc75YX+7esbw/iMWMrcrXfvNNutJ4H7hOsV81ZpMEQouHR +fog8InraKCuwmb28DLhJZJlpE49hZxeUJws4yN8VCCTgr/tLtTsvsSS+TGA58B29 +HHaglJwY0w2mOlOVcUxOkjne4VHokLOomhzzqcqTLCjwUslZqRn+SDgXyRw9P5re +J/m6E36dFyBeVglZvealVp4uK/TTqVVFJYBljD22M9wrOeo9AIvrru5VlgDNHu+r +wqgSeE/bGcwhX2J9J++lwOxsFDR7OILY33yoD7/6NN61SJEoRh7xZTQkr/Dc3Hpv +jYyj4YagdWKq2xDZzIxH+QfEMQssuFvulq5L76TJpXB3ceyCu2NEvb/563p1EvXQ +bp5Zjz7Ojo1elobs9epvfbCmiANmTxG8GLKteLXfjcph492gdIw0nsV9/bIzE5C7 +lnff4nEU9E/uEJz0FTw61VjcZPtpqEzLE/8abBU48pTj5HqKo8Nsjx9hPl6trO4h +81yMaqwbQDmza1KsU+CPIFiycyv8Hn4w6JEjwnUc08rOoQ3e7HjqLNpn8X6RirVQ +UrwSU7L8RTKeOCOBLg6AMXfH/frPRnNQjUG/Z7tBTjTJhm38qucxyHI3J5jwX6vn +/jBfdFHfT6510V0Q9fzgzp3H3fyHpnLW1qxDAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAF9fs1tF/yL+eBTf7F/RMdDrb1q9/YZCZ6btJH6CnxCuj4M3o4EkTW2PPSY5 +AeTX0zYaNXvlHT54vjdO+9H3ocyY0HfjVSzttw7qiTkvsssRLLW0PMpc8QMRBpz4 +CmD8vfjY63hKzE2cF5GyP1RveCuFVf7//wM2dfPwrQkIOtKrctejYjn1tOAfgJtX +It+RWvJ8T9t4e3KxYgKSa6eyYxyNMZV67X91C3jIJLgTTLwXXGQF5G8hH3KsclSl +RDE3CAYoyDTtaMlI6A3qDtmvfFKzeltKZc8w7uIbjgHvF49p+n4oh1WwDc/C8SUy +1QAx6DSSW1f470Egtfp0hJKT9yJh7C+/EdeAq8Oh1vMxYKBrtjswCsrFQ+bayEcl +2SzMLez2S/bIFSF0WaDqqIOZDzcjpXjbFlm/px01qoPDk5lkTPGA18Zq8mVc0y2N +R3vYzvfpigjkjXgMcOIfP1Jnlrx1x/4+txR723hUkHQd38nKENepsoEoLrcpmbIl +VAKYTALTle6jJKGf6oZf1TIs09Bc1Qs8Oo4IymubOXD+FlUSmggVwMiST15O5vQu +zdvidRHhAE581DKK04GLmWn0UE0Ko4uaNHAgl2gzZsuJQ5oZynOxmh/z6t+mgA7L +l2qS1WOq29Cq2qWrrfvqbl21LWLrf2X75UyTd3GAlQ19aqLV -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem index 76f56dd68f0..ed0d179712c 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQD6Q0Ixj3T1Ihl8 -m5kxBG53ksVw2s//th+A5fUC6tzZ6qM2r6qmZLLHmLLzsC283WfdrESx3+QdVsKN -od0mzhrW+gg9zTKleT/57x4IZ9yg+tJeScvNVvGifqcK2vLVX/ztKA3CdqsBwAYT -GJu3H+75Ny5beHJUNoPcI+qOFLghYxlw6jy8TvsTdkLcfWGXcVBk61LcMBYGqCYl -KqHrmoDLSdjpZk7ETPFTZ3XS6hJ6k1jbVd4ZaB+9PXwOEc7nQjoRw8Mdqfsyg8A0 -5zhYDlWl9CO+gmf3md6lOHMCGeWyWiY6MItFwj9D5Ny2EPsD6w7CLj9bXcVQJVNK -113j8YtMq8Ay804vDz6IMJDEpH0kX+MZYEEZZ0I3cmuAOoe1U74fdraTA4uhD9SQ -NZ0zXmzn+QIysDR0OgeV7k0T9DxFCAwH1fZja4keb55/qDOOADOxUPrRcCW3PAkF -+uPOzPShrB5lbH+tH1ggZoGYnKH9F0qVa5zAyXRgEc4Jsc+M/xLp66/brqdErbWX -Vp7NZS8FqW56wpOyNxeEmfDp92H/kcRS2bwSY7xyn0aKYvNWJK2pZCn7vknukxNE -KZDxBGl1uc6r1ZLdYFvY+/c3gWURc47k9+ReZu31ounu1f1tnK7cOJRsR/y2F9/e -2T+4u3E+jfoOCWJHi4DMtfEtY4mbRwIDAQABAoICAEnNkfTJqKUt9DQyMuAovWcX -6pAYh2SS0gGST0oX9x9wucdD0OCXK73/Ay8oUqSy6pGN3whRzZT1ZBSemnh6KaIi -RFHtdLUl578OTF4QOxliPq29t5OLw2C3Vw21eg2A7DcIK96gXlf6yA/TnwBHM5Nd -OZeSEq1RElvWX7Kc25xE/Fi3S0gBRrv7kUYy15fwu9O3Lk1vBN/bsLwfMXcorjjF -Q2m5WRs80aJWDYIws1hgocC1NvVpJosJWc9QYyMB/dwFTkIuQb0o64LueUhGme2B -7RSeooh7YRR6bDg/aOTK/yED9d6RpVShZpFuuofyRvzu5JJOOgUjrMeSccbqEbjq -VNjkdxoNl917TBezIR/KioXS/gauL4VF9coZostGohDnLIdJrVXwisAZFU9676pn -hFGaZ+vVl6oiWAFwwmbkSo423cIbc+dxMfHtYElbEfGm4LOLrybS3K3sg0gn8HZq -p/AISM2l/282peWjHfa1SXedvvDF1WFJRaB0GuzodkhsqEXXSyAQlc4veNiacNt4 -UQCYiOatcKzvBofgICyX/lTQN/TYdfo57tjCZxnOrDHwpcHH6xRcpuRnDphTV3KD -g2JgUKbvBcZgY03nS7JonzhuOMNd+AUT9Oof1VSS9KKRYLu/eJxl8/6dR7Mcmv5W -fCzEqNVvaigKkeVQEq9hAoIBAQD/73EFWGS+DfKuivyxGAdxAf37eyFRIQYL5Xz5 -xB1h+GjtfdxrigNM0hLHQ+B0209ZwZzl/O37OvW8x8V/Sj/YrY/BYQIGu5quD6lL -NbuEnhLMlU/yANukumNMDbDKo4Fgx+QY+y23zEIFNzraoKH61JH2VrWQ9qfrupaW -qVdUE/LtdZ6/Ar2pKnysLYsUpgZX9mashKg6fNfJWyLqfj3frVD7RM5NTNgwD5as -vnSAkvMLJV1997+57W68E3ALnGxf8UbTxAX8iQ6yig6Oo3bA5er6qt5wbH5m12ml -0UnPvhGNO1eRnX85rkH4okGJCG6ln+wPXbkc2igVH4Tlg/HxAoIBAQD6U3M4JkNE -kVRqke8di5D/HUpsIG2t21DTgihLrg6iNsjdaznvgzRGRAIQCLP14qRoibMsLN8k -zfr+I4QlE2dyIn++fJkaOq/xRvODSDNRRg5M1ttprvAK0QCvbgKgkYxxqpRzOuD9 -Aowpq5YGhHbe8g+aAoYGU9kYbx0XhzU2ZDCqbQP41e3f5OnFrNm8YCzazsJ1fNEU -H7ktT+vCP9g2DtZWF0zFdDeHo4f+h7DcLGaEkvAPpRWQ0qL5tZ7vPZL2GXO9gz6o -aeD5fiMZVUV/SOr51MWIcSIXa+oftxhkZxGqjJrm7Q0cJu3ic7yFXa+9nVaQd8r5 -oWRIvFxRCii3AoIBAQCgdf3VTRs0cUhDGI7fdWJM/uO/RO6Zj+vbj5Ewgz2sy/L1 -W2DT5560Dk2Js0vJ2CpC6vm01ERcrBWfu1xety2aaw7jPi/oCr0q/lS0+8ZKmlTN -AZwy7UjIWbeh2j+KfMLD6+9dnjWGs8B8xqjWo7mKqrWszaZecdbMG5sIcLl41F0N -dh4GQfCpXCL4TEGTu5fklG7BIRbcPKOJ8eLNREMEtwQ71WLG4jP0xgFA4tPmPLu2 -tEGOlcMWDf/MWR95mCP2by5p3M+oCkI1vArucRwmSsBtUq3NrKf/UHVDahkt70vP -0XHQsP8wKcng2dHe2XON0dtNswQ5S6mvvKg0wenxAoIBADmuEuWEQ4nAJwBrYfqf -1yhBmo1xp3QhDe2+lwRhNfQAxaXneDENPXVXZFZHexgUQifoWsW6DSzQ3Z3Dl0Zn -wzVUZ7T/xtN5ZGMnIyND9rcqek8QTvx6F8uWPx8tLSPMhd0HHi+zCHakKKHNbMNR -sIZMpnWpUTKSDXsI1149hHBlA6WxevHScX5eo9Mtsfoq+wrsC3jA6vhFGwkq2jsn -NO2324F42vLAha8Walam84S4ImM37GLeeiqlrnH1fIrJ2FwmnzmyzTRHOVSj0Pic -Ymgi6Cnq8h0vXFijQA5QxSkVaseuoF7HjnbHLZc5bd4ZKU56u6CMSdPdcVslM+xk -d98CggEAYleUQS65Nk2EWj6nLXD8HuVDWbKLXgP2Q4VpORDHpT4isboSTtR6mMwR -huncQX5lvUZqZNRvKIS/IfW/2CiAF+3IhUxx8r+AUyrc7OClp1keH26c52/RYoJM -sl4DfQE/DooWvieQTx4rILiEYrPWwQLYfuDs+dSgnaugr49s431PeVyxlH/849aI -dHnwInBd7sdbN11e6xErry4LU95imkRhg+y+QdiOE2N1Q6I3G2Jn+n4NhIWJfjlS -Dysosye3R96FsFQohVMosVLTL9mCT8J6LFIK8CjRcvhSQmaTmXjTuisIUntBA9zl -PK2zQzz0JkpsDD7mGM+S6apZHiLgHg== +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQDVIAS4GRI4lty5 +oxpQrte4dhEWHO+WF/u3rG8P4jFjK3K137zTbrSeB+4TrFfNWaTBEKLh0X6IPCJ6 +2igrsJm9vAy4SWSZaROPYWcXlCcLOMjfFQgk4K/7S7U7L7EkvkxgOfAdvRx2oJSc +GNMNpjpTlXFMTpI53uFR6JCzqJoc86nKkywo8FLJWakZ/kg4F8kcPT+a3if5uhN+ +nRcgXlYJWb3mpVaeLiv006lVRSWAZYw9tjPcKznqPQCL667uVZYAzR7vq8KoEnhP +2xnMIV9ifSfvpcDsbBQ0eziC2N98qA+/+jTetUiRKEYe8WU0JK/w3Nx6b42Mo+GG +oHViqtsQ2cyMR/kHxDELLLhb7pauS++kyaVwd3HsgrtjRL2/+et6dRL10G6eWY8+ +zo6NXpaG7PXqb32wpogDZk8RvBiyrXi1343KYePdoHSMNJ7Fff2yMxOQu5Z33+Jx +FPRP7hCc9BU8OtVY3GT7aahMyxP/GmwVOPKU4+R6iqPDbI8fYT5erazuIfNcjGqs +G0A5s2tSrFPgjyBYsnMr/B5+MOiRI8J1HNPKzqEN3ux46izaZ/F+kYq1UFK8ElOy +/EUynjgjgS4OgDF3x/36z0ZzUI1Bv2e7QU40yYZt/KrnMchyNyeY8F+r5/4wX3RR +30+uddFdEPX84M6dx938h6Zy1tasQwIDAQABAoICAQDIuNYY+OvTRipt37IaCQF8 +Zh4jgG8ZIk9dJlaXVAYFi1cG+chiLSKIr5lHCArNiT8E4gE1wtNzxYcHw00QEMxL +CL/GFMFdRrw4TpkEePDovbtZdvprmP3FJAF00673lw5hlk+SApi7FPPBrBOiCEto +ixfgsSNAw6vcM7eMrR8wY0AnXMK7b9PYdMwxge5MfgJXyUuNNOvbY6eWmKa+Qnqv +ZcjXYCKa6YtWkr4pY+005u7U9DQViNSLypYoMXlYWFzlNkqLmW3EU1jihMzgFxI5 +tPwW1TpEsGm7H84SVeTuB26F9UUz9vJ4W8DmxZz2JhNaOvifi056BaKS466KlbWo +iZgt57ajj0VmYxB0ZL7QgQqb2xDZL12kU1AU09QAXJnpy/RqvV2HloKbqrOd5h4L +oME6j8vT6Q8o1vsh2zJuLXHAsMr30XK8x1HhLDDzln49gq/d3GrZkNrPDjcumiwI +o6PYR91Q4QI11kdqR/3005wV50g847uURFNF6J4ziDeDGsHqj2phmOIt6d8vWcFo +XBEovCZkXQUSx+4NgAAy1GRBjK6tLRRQnS9bGgkELS8+Jx84NlgKkH3m6+lKNJQ1 +o5SpUqmk1dYnpTv99U2+5qvA/o9SVy56wlfuo+u0GlbMjs3OmItXErg46UBPhd4d +HipFZrBItpw0DYAF+voLQQKCAQEA9ePjYyy53VGLq2+vRx02IZOMQOLaaBDfybtP +51ksHfCPg+XZ4jWsDH4EPn5DeUmjZ2nG8GYSuv8dAQ4v9kArToLDyy/qhXHzaING +uSd6KlTGrVrPK1Dyu2p69xYrnduD6Vm06sJ4olDq792rEj4/hdzVwrtgw+d1ZLXG +3ropWgrHQT8z7+B9CAIAOXhYlKrV7+UdbAod+j8OpCIHk5X3+NkT4Ht7biqzQvbo +pJJILFA1qHi230N9YR8ng3PHQYObYJ6NFBrxhpXIfXwbuPyrEApY3zaL3HbkYC52 +aAI3zy7WOqZSqRZ6aDzXdf2EMGusNSxj9/TAZhTAiJvwHdwBowKCAQEA3eNC/iMt +kmy4R3FQgti0Zq+CBUErMn46pQhBCcQreI/a5U4UT/iY5WGutKXp45d/BM2ztyQL +T/8p+85RkasVF/rJB2PwlzUZKAAq29nGXuV0I6N6EiMYa2LfFLzrrleNamPQ9Ubn +atp0kiyLiPZ6P0+Y5wZMirHlMyup+fzG6xsS7KVy0Z5Fy4YetP63r6xCVQ+Rdu3l +dvXqGb2Bdc9g4OxES1Zj7MKHg0b3ce2lYaL0cq0z3kJ52MAVbL9tQQOstJX41VYv +/QSVIjC5VACSa2qsqzquqxYOyT1U0l/8innHfD/uY/8907/q/JqoO1hU5LtvZ7OO +ZF/e/ycZCM2U4QKCAQAXUIJQ9v6wk3jQyoguEAD/8gOMa3YWA/OUJySOZRAfzp1s +/jBImJo1nQU9/67aIzdRKOBqDuObw3C2lufJS5BPo2p5K5PrD0DrGfdsuueEeAFW +kpOuIcDCMHh0US/ViejaCV10HPhfO5jrIXOFCU3wnV3PVwD30kx5PhsbJz+ggAEg +mKOODRUN21K2IEkV35TlaC3//n2VKsFyop9hSQj4GW0fDdZIPdg9czff0tbxDLHp +xXhhdv6+ZLvUZPfxqE7lPGNYEq3v+ufFrizav2pg3PpMP9nHD6bbz8v+VKeCB4jc +isSvr6fvlkU/tMgB51OuvwTDj/tmMnWG/nIoAqJNAoIBAQDWiLYsS8zzJwUhplDm +winiosz+0Zy3jE6dZBamH7K8NbK6RLzk+YKzPbgSV9yFPeQEu/KIH2SEqzxnh3tc +cWLKtaKK77keKaux/j9yI+RlukqJbrVHNgGVSppQTb096s8DT5Eopa54pNFSx5j+ +Cvn1nrtCm9eDvi7SQ+RrnVii1qF8hxc1z2bCOmIUM7dcNhxIa+4EZE2ZsHjw/EZg +puqPbkE16khhEVC+v+3djJ17gngBLK/atMFkrYvJgmhbFPd1/w8BDf0GENk0npGB +w6/OBez+/ZUGPCR9tDv/z+i35rjWzGVs78tSodvM8qe4AVbLdOJpDLWfHQbaAm51 +EXhhAoIBAQDmZVXAS4dTDAp/cGwPoXOyFxu+UNnGnAKO0S3aJW9qV6E5N7jsLqzI +4eD2Mk6chkBrO4Upmwgx4sLVnDMlHQGvoqpWUZES9k6oIgNZ6N7KXnvFm5GI7mlR +ySA2LftCeSb4BzQmwyX5wcVjOzfB6bkSgEkvuMFrRStSL6+79XZCoh54jBm+fW6g +up6oXa0+lJbyO4Qrx+oWoe2G9nrUJzsjV1Gj1njnxDECCMrmB+X5P4D02Ac2FgxP +rN+bxs0TpvO4fXsvBN4B+/dtF2Hjgo3rQm5FQ/NmpoO5lAs1VZPjVUiFCjhm3Fyk +Xe2nzT23gDTuPny4yivLAMHPZPfGLLg4 -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem index 89cde777a45..f36eb94205b 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp -ZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPpDQjGPdPUiGXyb -mTEEbneSxXDaz/+2H4Dl9QLq3NnqozavqqZksseYsvOwLbzdZ92sRLHf5B1Wwo2h -3SbOGtb6CD3NMqV5P/nvHghn3KD60l5Jy81W8aJ+pwra8tVf/O0oDcJ2qwHABhMY -m7cf7vk3Llt4clQ2g9wj6o4UuCFjGXDqPLxO+xN2Qtx9YZdxUGTrUtwwFgaoJiUq -oeuagMtJ2OlmTsRM8VNnddLqEnqTWNtV3hloH709fA4RzudCOhHDwx2p+zKDwDTn -OFgOVaX0I76CZ/eZ3qU4cwIZ5bJaJjowi0XCP0Pk3LYQ+wPrDsIuP1tdxVAlU0rX -XePxi0yrwDLzTi8PPogwkMSkfSRf4xlgQRlnQjdya4A6h7VTvh92tpMDi6EP1JA1 -nTNebOf5AjKwNHQ6B5XuTRP0PEUIDAfV9mNriR5vnn+oM44AM7FQ+tFwJbc8CQX6 -487M9KGsHmVsf60fWCBmgZicof0XSpVrnMDJdGARzgmxz4z/Eunrr9uup0SttZdW -ns1lLwWpbnrCk7I3F4SZ8On3Yf+RxFLZvBJjvHKfRopi81YkralkKfu+Se6TE0Qp -kPEEaXW5zqvVkt1gW9j79zeBZRFzjuT35F5m7fWi6e7V/W2crtw4lGxH/LYX397Z -P7i7cT6N+g4JYkeLgMy18S1jiZtHAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA -h9/gIGo83p/F0NDDWGr4P9LxdrE+kkaZF2BxRv2rkty/OC1Qe2lcJPWaEOQY07an -witmxGPaZB6e764J9z/C2l8hoh9LFAIZ+gPBzMEqPBO2/5WYWjjPzzUlkTBczIdo -mCZywvYMrdPQ/F2LnZd8iQO0pmFUGC92vV9Hb/i3y7RthnJWRYv+2F/D5ZWE07sz -xA0gwmnY8d8/G+O2CwgGsHLl2tByfnePDqq+ogwRKXtsPeJwWloFvnPRHE0OmkHf -n2XHbbLMS/m8Wi5utN/LpMV+WITQHMxGPGXcX6XOWVFQuesfI7DKoqZzk/aB2IFO -tu2deFdfnrj3Md8PRAQBe0Ufig0gl6EvutdmiNCeiDloPwV4gLvH7SLQmLheOPP2 -CQZh1skRxhPmnJYD4rrsMGv86dlhEvtnxCO+cUrxnUAAr81BAX/Fo1Img0rPaanD -N4/FG7LnU/Rk8g9roN/8v8s62CnyxcQ4UAvSWAaRrudpErDk+L24Ib4UCtcYiGSB -Dj2tK5SMfcXn+bR2HTdVIKHWHIK1X6bS7Jn9ZXlDg/MCyCILeOmW523FoLfTvNVH -IY9MgNe5KDX7dpPCAJFWwmidekNz+sSbpu6Br8IgWd6SuTEx8Lmb9GB0V7P2CHlE -1ASW5YJMgVPJLQ9LQhRIim2+pL3Pz/SM+oijeSyEoSw= +ZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANUgBLgZEjiW3Lmj +GlCu17h2ERYc75YX+7esbw/iMWMrcrXfvNNutJ4H7hOsV81ZpMEQouHRfog8Inra +KCuwmb28DLhJZJlpE49hZxeUJws4yN8VCCTgr/tLtTsvsSS+TGA58B29HHaglJwY +0w2mOlOVcUxOkjne4VHokLOomhzzqcqTLCjwUslZqRn+SDgXyRw9P5reJ/m6E36d +FyBeVglZvealVp4uK/TTqVVFJYBljD22M9wrOeo9AIvrru5VlgDNHu+rwqgSeE/b +GcwhX2J9J++lwOxsFDR7OILY33yoD7/6NN61SJEoRh7xZTQkr/Dc3HpvjYyj4Yag +dWKq2xDZzIxH+QfEMQssuFvulq5L76TJpXB3ceyCu2NEvb/563p1EvXQbp5Zjz7O +jo1elobs9epvfbCmiANmTxG8GLKteLXfjcph492gdIw0nsV9/bIzE5C7lnff4nEU +9E/uEJz0FTw61VjcZPtpqEzLE/8abBU48pTj5HqKo8Nsjx9hPl6trO4h81yMaqwb +QDmza1KsU+CPIFiycyv8Hn4w6JEjwnUc08rOoQ3e7HjqLNpn8X6RirVQUrwSU7L8 +RTKeOCOBLg6AMXfH/frPRnNQjUG/Z7tBTjTJhm38qucxyHI3J5jwX6vn/jBfdFHf +T6510V0Q9fzgzp3H3fyHpnLW1qxDAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +dr0LKtpOa+Xu9PKwnlsM48/ltph4q9+tsu4CeC8XGoLFNbVIALuZZsKZDehTf+/d +bgEtjW8vVnBGAvVodo1MgCHnhPPensDLfyggAULT2X400cly+suGbKeu3kIOlKCs +TQsFdNKOPm17NcpuM1wTik2UT2EWLdzZ25Wy3Coid+ILrf5YZ75djqtxZlYbRiw4 +4IndIjN0bYsn8l6Z8Pt5HdJ1nQnbDZhQrx6FXWZ3eSSmpklfl4O07z0KlXi1Nmaf +OiVcOMvZUnM8pYmNvul8Jus/XmP8x3jSbYzJDNOJ3YV8+OD8DVG3pLM8U1FmjCZ7 +KiR5DNSxZFpHGXhUqDpTrhLgoqGK9chOqPdzU7Mp4taEO9FV8Goc7BCeOKB3Znxb +XDIszs0oBIHO/tsqUwEcWBI0vjyC2pBYQAYK++qwwmvbfWg5lrb7eH1ZO42DU9QD +AVR/5luxImAA11AmSsGf8i+FJ3F63PzSr0uUG7BnTLC03xna7dPdKXS/pGojNVBT +Q5A5J0rB3+4L2mZLE3mjst3t1xHfLW/0RVRqGwz0QUIloZkO6wPN6Jz6l5Q+TgCY +uEks1YN/qlwjHwI3ycT+Hr/sY5igT0OAySo7qa7lN13qTiO2z7eAMDgafNnq34kJ +4OQDCE28Bni0fFRIaqVCqTU31Kei5jbORif2wK81Zmw= -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem index 848ecd9492d..376c85ab8f7 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem @@ -1,30 +1,30 @@ -----BEGIN CERTIFICATE----- -MIIFMDCCAxgCFDtXgZV+Jd7/OrySQd+e1dVblQe/MA0GCSqGSIb3DQEBCwUAMFIx +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRvMA0GCSqGSIb3DQEBCwUAMFIx CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl -cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDgwODE3MDU0 -OVoXDTMyMDgwNTE3MDU0OVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDYyNjEwMjUw +NFoXDTM0MDYyNDEwMjUwNFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE -AwwHY2xpZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAKHIdR/Q -waHkIn5z3cc+HNtMfHcKgVGzVDTobggUpWSwVUwa0DMq3OTcNrEnT6zZyUrrv1n1 -6aykGPjRzJ+SZX1ohu8X4EHssBOGaNXUH/AYyfOKMLMGN9AU7dQzNKjuJGkbBXsf -CtmQbQ+/ThMhE4X2bNxEULLudGEfKKQ09LZaqVjhhdVCbuOhx4SPMDJB58Ko10HQ -t7Mh1B3nUTJXFherTr5bcsazQhC6e5i5ySlBnJDnsa9+DMzopiUqMrqqb84WuMKs -zzqjlZXDxTRkAX7nGEU2wjGCx+moPaGLGZs2f1VjBOR7HoNGMAEhmHQhWsIgBCsZ -nDHgAc327Gz1xqsjVd/HrrywJyZfO7ZhxMdmF6MH7eRQjfZGe0+Oajc/7EtFWXg7 -fWdafJ38HGoO8hVjlthKfAFxM5NWjvS7I06IQRwMGGApP5bx3uFmUUixtl/FLa6t -jRKfzaXbX8b0p8HUMbeyvQAZemw+vA+nuKir3DtNIrpqfeXraCMUiEpI8fCRm29S -BvfEsDXCZxBje+nma8g27po8vCaHST+8sjwnNeiW4w6NpQbqqmnvzpf2ivm1U2su -2H1E0EA58zrUoKD13BQzFjccgwodlyutUfk0xYQLrRMOqggtMhsjFDIuNegnPgTH -t7DSyAAg9H0QBXlrd9Ic/OiFMLsb3bu6eeu/AgMBAAEwDQYJKoZIhvcNAQELBQAD -ggIBAHeimTo5afyFhpaH30D9j3EXXExt482nSCPZQbYm+taPVxEiJ4vAs9pa032S -LnA2CC4D74K2Ykd+B/mDGgT5lVpnWuP9VL3wpRErRy6TgkYAJwsEnRLGltNhbuT1 -lup3J4dFgR3tOgwxohjY9FlauZBA5Wu1neZDxXK9UTeAmP0HOb8iXh/goXEvmPLA -HAVHmCrSD0lgEpgB6mg72fb0AkPQq1wlzVBbVtaVgByQP561WmGW6eHO7sqwcO/a -/0Fhd299ChMdnzbHToRt6VFET+oEiCOwF+yEQBRWbjPjCjG+6nYHJh6FxE2ABtEr -Ebr3/7//Q6C8uD32swxXjZaCPEtBC0NNoDW5yi2D7xNHyc+4XHJnRo/v2rPry1RI -Bbwepp2aaCrs38uxut/qXka2xRTyDCimDezJFPxTigJoJ9CgxGTQeJe0R0d5uzlJ -FBtIdyJf6HDKzxNJqB0+wJTYiIiSl0VFPtBYJynMXA82SJuyvCMVgqj+uK4xBr51 -APqdWJR6nBoHaFURD105KiQRM9EVHrbnE38xn4DRN3STeKUlEP94zb3fo3UexJVE -+MWWqNJRdMtUE9j1LRX/P1So4c7BeFp0op0CxJrpXlRmRcWV5lBYhK+WtT8oiZHf -SVSJ8Chol77vm1gVVbJVHIrrH3cfWefv/2Y5fpwuQg6yk/u6 +AwwHY2xpZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPrdk2YZ +HRhd4RPfa89z2Ay/TOby545N5n9R+kpQvyhnCbr41afzIU5DrBX7y8cKcvo7y9Dk +Cdd17Xqn4oYliSvVNI8B2nwkBz87BUYT8pNVD+QUc3Jf2P4Wj6XQM4pd9Ntaw7rO +yIf2Oo3Tq58SSjbXYrqAbCYuX4cs/VWLWyq9PapBwxEPeQ756GZS3Xvq1IfCNIKv +NLYRgExctHzkUWPf6WAS0lKydBcCobYvSMfEzPkbBlIhuDRdMmzuji4FefODY4lI +zvnaD7+IIiRC4+IY9xNhdH6s0UobpSIqLCSxOJNYwqhUQt6gNAO1mdJhUirV/XIl +xG5nCGbQS77yeoBLIBEL1t7tpo3/AdEzkR+/wS11dSpcllSj+7BJaKBhTKVZrX1i +gMqUSbiTF1Et9PnAkS1jtUy1w3Ja7FyPWfM8nt/K6vfNRudg/xwY0iY1RFdCXuMw +kPZSr4W+QryGaPqm0RlpCpLHZBOxBDpf0ZcA37ullh5hjXsn5CRw/0ZYpanqrkrq +2cVZLnY98IrJI1QfQhHlDqUP7prR4Omk8C7edXBqQqE/0mqL7AEhppOyLedLFC7W +wUBepmc1bNH+Ho11CZeSdTZfIgwAcD3v6MiMA5kMTRcW6HAHNS309zNJeDf3Eesz +TBXOSCqNBBbk+oW8bxkTLRdHRgdlLT7N6qzLAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAADJZ+I3CJs6E9U2RjIzi1lMo2sYgdcKJS5+yWW8CNjB+DibKfkWjgWvq2K0 +i3hT0Uc6y+ter4OOeIkGtofOiUPekaZsQkPpi73sabwhDVnlki9QL9Ayrd1qDX82 +fMM5roL7w/a+YdKzTQE9hiwPoQhrpj/2mhu7LeYhidSqwzH1anU5YtTKHq3ZrdGN +imhnklcmbqfcNQU0K2l2bu5vuJXFs/v5FCp72ux2p6QDPWwMbwvr413wibt8o7ZT +bBGsQ1MtfJynRVwLGLosn+2t3NPJTfjd4dMEsZhkDY0EX4vbE1/X+K09EN7jPOHe +aJ2AOt3cO3A2EHCR3Dbmt055C6Lb/YR6s05dX4lBT8zY0knsWSL3R77kQoa3+7oR +hU46ydU6K/Kt67nO938WBvFgI81IatRVKVRsXfTIP2oEa0TkwzuvS7nzj3czNU8o +EOa9ixawVYRlEkcuE4KE7x3TcLEGa1gYJDGbsXAfJct1Hur1SJ/rTDwZvlc+qp3o +wWOLtN0mVHEH1OaGlWmeeTuRG16CuTcku2DYiqeuRNy5eZddSuMOag/DKnIN5ZqV +s1GNrpnxPCxd/KFKtdGl+l++Bc9dBmkd+r1dJ/kRGvhul77Zm2xEnGdyybIs64iQ +gvXq8d8ohbZOPxswiFo3p8fbBjWjv0qm3UnlU3P4B3RDMrrC -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem index 9807809578f..f88456215fe 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQChyHUf0MGh5CJ+ -c93HPhzbTHx3CoFRs1Q06G4IFKVksFVMGtAzKtzk3DaxJ0+s2clK679Z9emspBj4 -0cyfkmV9aIbvF+BB7LAThmjV1B/wGMnzijCzBjfQFO3UMzSo7iRpGwV7HwrZkG0P -v04TIROF9mzcRFCy7nRhHyikNPS2WqlY4YXVQm7joceEjzAyQefCqNdB0LezIdQd -51EyVxYXq06+W3LGs0IQunuYuckpQZyQ57GvfgzM6KYlKjK6qm/OFrjCrM86o5WV -w8U0ZAF+5xhFNsIxgsfpqD2hixmbNn9VYwTkex6DRjABIZh0IVrCIAQrGZwx4AHN -9uxs9carI1Xfx668sCcmXzu2YcTHZhejB+3kUI32RntPjmo3P+xLRVl4O31nWnyd -/BxqDvIVY5bYSnwBcTOTVo70uyNOiEEcDBhgKT+W8d7hZlFIsbZfxS2urY0Sn82l -21/G9KfB1DG3sr0AGXpsPrwPp7ioq9w7TSK6an3l62gjFIhKSPHwkZtvUgb3xLA1 -wmcQY3vp5mvINu6aPLwmh0k/vLI8JzXoluMOjaUG6qpp786X9or5tVNrLth9RNBA -OfM61KCg9dwUMxY3HIMKHZcrrVH5NMWEC60TDqoILTIbIxQyLjXoJz4Ex7ew0sgA -IPR9EAV5a3fSHPzohTC7G927unnrvwIDAQABAoICAB52pRtXD0cBMr+V6MJuPzfK -GLu/picwud/2jlWGMbiafY1FlUO1Q//yOXg1O0sEfWNWreLuK9Ce27bqpnm7stGg -/5uA5vDy9RrQGeh9t3/Z4xkxQcdMGfFcJ4ZoF/fhU8jY1jjfWQcrq2WmM7jlZww4 -ITp+wKVYwmBRYjE9aYD25GGAoddM2Z2SZoPfBIfKIi5k5ZeWiii4a30wA/mTLW1K -jBrLFHs24O9OdhyFi0vx913PywUNGrLw8ewYnJHKqx/m5mZ97M2RZQbc5MLfO+rd -8BOEYp+5DRTB4c0L6MfxxJta+F5fkpjMfBiSb2caqsDYD4DgIym/EQfsAYvo4b4m -R42rfg5SEiLV16K3ePA2lEHWGmgzsY7PZzmJUGtytHd+NrHMBWloGp0Jke3LV30g -+3WnvXX+/MZ/dtH2/hy1qpZelZR32h1fchSnKqGUnSBcfMjP1YQY6FrTqNxnAxbx -ufLve41IN0+rCScZ/bp9FEd63DfdAi+U+RKSYjF5promG9ao0u85Yw8iF0maTbEn -oOS7759oGi6Y8udWuAXqmbo1JRw7ZIVKiIU09cR8/IgnGBgOO6/xSOrY+nUCzFvz -gVYqOSWpqBilrZF70GUy8iRfPWUAen5Zm58UvyYmByn0xV/aUKyKKcxzfGeQ1JH/ -NCdITbwdLyVhqiKuw9l5AoIBAQDTiXYWOjyxkXQMlYczi/w8pRaCvYMNJGzM2qmP -hn9ChIoi2STv4DuqYUmbW6irT/mtcaTA8EPwC10gpj0FWXtM9YYVhnoXl7rfEZUy -oYT9DFRGx25cR6krqg+sxqV22He8Ld618jn6MISrQS7Zd3rLx9goQXeTkLfD7Zdz -fwduSTMKBFp6zQdBlWf6PhO0vu0Cjhhb0Mr9pTYOYMmZhuIKHeuGU2kZT+Myxn0f -+mlkEBQ5bGr0h9Su+ROvU0vXMmr7SklKbLk1TZDPkduqfub40N63aapto3aVMjUg -ff1urZh5/wiAgKzuLcucLapAWCkdhaoLa5mDyug/yRLs8vRpAoIBAQDDyczSH5CO -deWarOTcREWeFc0Up4AD6EbVZBiMDuIMEtT2hMO3gLiF+/XvEptBO46zMp7LK1hY -E7IP1f87OiH7rOtQTVtsVD5UWIYQaikvCG8OOtOM77zb/OyNYx7mp1Ne2ZuoPKgm -pAhIhdFw9XYiEsdBTnI541htujhpnx+x8Yb8H7CpQ7WR29KPd2sytb0KqFWcEN49 -fYyC4DhI1Y86VzuU3T6SLdBqtq0FtA9QpXYvoRvfqoK0wMb/k7m7ptld65fIQ0On -9pnJCEk63msWSVxPbJzwfQOgEeP9f+Blg8S65kn2kb7DvwD+SAfhtjItb5RLbJpC -MD0eND8RtDnnAoIBAQCyn6SjaFHP7277IVilYlOAMjcr9zMuac+lvA7qLzxOwvSS -MKJMHCDF4pjxIxjub/35Q7lHYps7m5zr8PQeDE1d0wWCL6fxPCKEMqi1tEZEF6Ei -k0zRh4GGaXgLAUK/dFLHCETDhuMGLOoaYlRZNdbvNLWGaKG8bbt/KqV01ZNEwXLj -xoFQqMizzKGcYcbqHT6tpadUAJ8oR9W5lmklxwwgVtuG9fANe7PyMEGAO0e7QwiQ -5Mf77KbfiVvh9IhaEyzbPQNeYSiTpgfd/uBqu/X1rQoj/on3Qszjdx39beYPC29x -tLVNLVrZVLpWCwl0g/1T/IZ2VkmvTCuJkRT2GMHhAoIBAQCXaOZG0TY/lZk8ptxO -I1YbTQzOHkL3wpeUytXY6mGRqLac9ktNC/SnWRT2D9OU/PP8TPdUc27cnk6jxICf -UvOY6D8KWOiMvBF/tP3oj9DNmJ4ZyRB0+6l4Dv740GDDSz9EKNEhp9b6Gvx06Vfx -HUKAUzlYncvkTJbENLEPrYkbWYdkTzWVkNFHvVH2tQlnq/hH16ptCPZ1YzRYugGN -AerD2VYwZ3DOJzP4ctEXigoV+f+OEe+2Zuyx0CuU1q9aGUwcP+efbbVSnXiMaSYI -qpzyHUWP/pTXvjYhgfRoxI3Ks75eM12bm1aFlp60BwxhVz8yuAlc0t3wtdFuHEVx -8YidAoIBAQCQYRslpvkKLynGY18m4oViMjaCvt27fepngAuuVFEZ9cJZeWY8GMcc -IJeq9qTtlMuiWHnq8oGVCL6BnFX6BIMT0W1knCSRqHcwjQByZ51X2Bo60TktdPOD -c6lILIdkYCFLs7fXv0xUZihyIIdYaxx3XpeNwgaqM+wFsbSclF3U4cdwm5U+ltOZ -L/3w1rFlyH/+ZWIItBC8N9pHD4bayiavHT99E+35Vgtol5jqIhKsiGVgoj3tdDW1 -+xBdyrg6JLXHFP/vobY5mvLLGdcwCXEd/b+jUK9Uhdbq2VC27XEllIwGpPK1SnNU -7tLJO1z1/eDbntbQC4cvewqNRYhwlnWe +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQD63ZNmGR0YXeET +32vPc9gMv0zm8ueOTeZ/UfpKUL8oZwm6+NWn8yFOQ6wV+8vHCnL6O8vQ5AnXde16 +p+KGJYkr1TSPAdp8JAc/OwVGE/KTVQ/kFHNyX9j+Fo+l0DOKXfTbWsO6zsiH9jqN +06ufEko212K6gGwmLl+HLP1Vi1sqvT2qQcMRD3kO+ehmUt176tSHwjSCrzS2EYBM +XLR85FFj3+lgEtJSsnQXAqG2L0jHxMz5GwZSIbg0XTJs7o4uBXnzg2OJSM752g+/ +iCIkQuPiGPcTYXR+rNFKG6UiKiwksTiTWMKoVELeoDQDtZnSYVIq1f1yJcRuZwhm +0Eu+8nqASyARC9be7aaN/wHRM5Efv8EtdXUqXJZUo/uwSWigYUylWa19YoDKlEm4 +kxdRLfT5wJEtY7VMtcNyWuxcj1nzPJ7fyur3zUbnYP8cGNImNURXQl7jMJD2Uq+F +vkK8hmj6ptEZaQqSx2QTsQQ6X9GXAN+7pZYeYY17J+QkcP9GWKWp6q5K6tnFWS52 +PfCKySNUH0IR5Q6lD+6a0eDppPAu3nVwakKhP9Jqi+wBIaaTsi3nSxQu1sFAXqZn +NWzR/h6NdQmXknU2XyIMAHA97+jIjAOZDE0XFuhwBzUt9PczSXg39xHrM0wVzkgq +jQQW5PqFvG8ZEy0XR0YHZS0+zeqsywIDAQABAoICAQDAMTs48CqTPXEvyW6OS+EM +uw7OrO/r3RCnIIYRo1UgPfh9byA5AJLWpA/V88eF4SJ/RYp7qglEMcvTuYVZYq55 +j2kp2rCphOysa6o5qxSf/X4kLerYiEf1OhGpZh3mdt8doqbrmnqVd3YarD0CrH+B +DnhMDBFPGx4CsNwRSqd40ezJYIJyspj7eUisA/Y9doaGz6ltKY/HoRba6fc4667T +RntEKIdL5f38lv6PViB7M/IZMrQf/kdijrgQLp9s8LMiddmvFsHDN2XzRfdqMnjm +AlxgU7xtRDc/gHh9+TNClSeT81+GmK92YeQXp2yGehr6SGFYr0iTkIomQpSVYK2p +0haIIjQMHlc7E6WVkDELdpAxERgvV4uDN9iEkd4t9oNDPPRioPJQ4bhbMSxCO+CP +NdFHTxIbaDr39OdgqNNE14j7WJsFaCsYXH2NFF8jvwIkPQ3QVMQT/JPGStkyF+9P +5IjFfQ9aEF2i4mAVYiG0DE3NyD/OOI9/uF05POn15H9U+bA9hfBE0Rtm9nMqfVy+ +zgmajXkVb0jTHdL2t/UKv0YdgaglvDcWGFdEUskjJoB00NJwBGorSvcMZiSTxpLD +cGRqywRHOEqNIAbKv0Dt2AX5ZdBSQu7/z1/5Jcdmx8vp9lVhQKeMzYxsFKE4V7fr +ztDuPOlFGyffxpRenBIxUQKCAQEA/XVyoOW1cSFqeG46mjw+dbwjqRmLtEVhAMsG +TtW8pnMJHZ8u7lfM/UJyMN4NQEPJElrABns6I3dqPOwaKOy1a2leHMg5Vvd0/uqp +s5a2fduP6l9PXvhhWDN2sChbeKhl0jJDVnaTO7tiye8ZGMYOM/AlfQX/+PY4QNgd +O7UwcLKhoytxtPtHFZTOZp+cECdTvlmX9lZoNEzFp0nfzFaLVwDsy0B9e6KGt1xJ +fV3Drw7p7PeUyYBNKkyCRVee5S/pn5fT7pkIxMHvaL9BBnWVpwiH3Vi0hfTfFZk4 +8tLcVZgf3n0Y4dMVP2VQRF+kKBTL0coLne36HksQEJyk/4KZEwKCAQEA/WF4z6kc +YXwsU5847+ywq4ipq9efadkMDaGzI6Ez06TQjRYNsZGplCV9fiGxKX2YmZyFzjTf +4joqOmI6UANk+JZKW0Eyyak/TnxugrjMFq8WnK64cIz1TK054tAM/bHGkavaYb8K +bCfbKmaSkwkTbb/OasbQqsC7jbALdbM6Ae0PMrpPmI90YYIMYLRogIaBqCkB43vp +GEZN2VeNS7blhRMiq7YBDXn807aSMQ0+skNSQ7MA8F5i4BFvWyPb1nKZWux1RWLZ +O23IxGWmoGho1CAaEk55LXbqLygU5ZYlBSqkrP9N/elJykOp0LwpjoYBgjMPmanz +o6jy8XIUP78MaQKCAQEAi8+YjqaHosMTDyGG1AN9VMaWSTYdOTC4JI7ZiO0f5hU4 +pw1i/viRy/Y2NTyXxKZfqO9EU47v8BZ0FO0MNRz1qi1yS6Aq+Q0BjYh2Wek9+0j9 +JwSyLKoIUHX694sbggAqQnuVZ4F7EAz6nnd0uZSuyvmiREfl/jgbqbFM1t3IvbHb +tb1GONYPTRlLjZJnrQV0jWCwkaLyUj8zHGeEuxvWOwT4mdmWHnf1pfmTVEM/qTYp +1Zxwh4JtjnKrvYJq1PPMBEvlDQ1/p8FuxbISNXTxOzVadL/0vJvp3ukpX9Du14xV +sA4DhrZAVzsUvtKfI7jtAWlZZSGbwdAYKYGvBn7M3wKCAQAHZWX6YcxTSCWfF0G5 +NyZ9C1Mwke20UEKaz0KEYrs5jVENHTyvFzpk+actHFyogmMG8NuzBjYWy23aIG3l +UgQLgY+QFFogKtGPP/CV3kEO1HOLhUoa9vJeF5xd84a9jQfnzqVkPwhV2d/6392d +byFjDbs/wKfspA2VeDMNb3rc/Yd5CpkyMdXK1tn3pKx8O/Di8Ld+ZWqLa9nv4y9b +q24NsV5MttZXB12K7IRd7C4NVAu9sCbx3T9znO6sMWLEYrn5Pne528XNh0nZ+cGg +YwvUTU+VgzbkTdlOIRRjEzvnZ7RA3H7xT3L49XqqfiOUZnL60vS8nopfF5pn09Wl +erUpAoIBAQDWHJQT+Jvj+dXPC42oIRQKCiYtp1buM8YyL+dJNi7p73brF+2Oqx3k +XNT5eP9GthGqpVGJ732FWJDbPViuZB12zlx9tpGF3ghQTq3p/95KOhGEb2fG7mnl +bEcPqOoFEsAlc4DZYqsDDUvmsifimKm20ZWi4VjTqQJUHYCegJsjrA7D4obGbOxX +FujRMq7/idXRjEoWLloQTMPAQ0Uu4Omwnea25daRzrrrJ34uYrTO1sNgOKk9JAem +rGgrOzsRVG1aNwddcZT/t/icLKS25G7AszLnrNFxJB7DRAfgzpHkJBwNLpcPVtfR +KB6GTGRi7uqGHYScU6+wRMHjdVzdKGNM -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem index a2b19bf835b..7c679b4b367 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp -ZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAKHIdR/QwaHkIn5z -3cc+HNtMfHcKgVGzVDTobggUpWSwVUwa0DMq3OTcNrEnT6zZyUrrv1n16aykGPjR -zJ+SZX1ohu8X4EHssBOGaNXUH/AYyfOKMLMGN9AU7dQzNKjuJGkbBXsfCtmQbQ+/ -ThMhE4X2bNxEULLudGEfKKQ09LZaqVjhhdVCbuOhx4SPMDJB58Ko10HQt7Mh1B3n -UTJXFherTr5bcsazQhC6e5i5ySlBnJDnsa9+DMzopiUqMrqqb84WuMKszzqjlZXD -xTRkAX7nGEU2wjGCx+moPaGLGZs2f1VjBOR7HoNGMAEhmHQhWsIgBCsZnDHgAc32 -7Gz1xqsjVd/HrrywJyZfO7ZhxMdmF6MH7eRQjfZGe0+Oajc/7EtFWXg7fWdafJ38 -HGoO8hVjlthKfAFxM5NWjvS7I06IQRwMGGApP5bx3uFmUUixtl/FLa6tjRKfzaXb -X8b0p8HUMbeyvQAZemw+vA+nuKir3DtNIrpqfeXraCMUiEpI8fCRm29SBvfEsDXC -ZxBje+nma8g27po8vCaHST+8sjwnNeiW4w6NpQbqqmnvzpf2ivm1U2su2H1E0EA5 -8zrUoKD13BQzFjccgwodlyutUfk0xYQLrRMOqggtMhsjFDIuNegnPgTHt7DSyAAg -9H0QBXlrd9Ic/OiFMLsb3bu6eeu/AgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA -f8vcJzjwqm2yUx1gYOt/BzfZ4+VNvP5CSIBxRAkT4judk4Wz07Pb1dQT351HcU8X -+pJ55HfIgUFyOSR1gKGJSV1HsREqYyaJV2KeBQM+klEeagYR+0Dt8R7NYTRtqUmV -lyoW7eHlUsbvUa0jCiwOK/t7WDr9qH4ZUKxVYSpJNa8FO058SoUcCRFue5TnTxF8 -tHH+J+kzcagcS0Rk5CCFWCtNE8+0FdfUs8IUYaV8cw8PEqdfrfJ2f/Zj0I37rh9P -pjuqe+GGPp7hv29YJ4bRd5TSe05vol2g+LYx2JNe1sr+NnGZVDVolsTg50cEwBo9 -gLW0ea/4Y+OoOAqFOdVM+RvfEbgpsT0LpHZAKXfiGi1PAMzZ0bJcOH8F77mV7OcR -qNcshdM1LkMSojGvoVQrRP/Bz0CVjSpwBcmkGiehESkaxNNsUyQBla84v0GDvuL6 -cA6NDfl8iPz5W3kk+2fypgO7sw0FXQVKjq63gz4XAQsGP8JzF1cC4fDnoRRsHO5E -UdWE98/AnVZ7mQ5bC11TAuDyzKGh1FNjrYFmsvTnMIWo3Ef5Tc5GXfYC+fVryfDf -BAbw71FprzMMFoAIxiSCPzK6y/am7BdGM5IZN09V4BBMg8QwZiXtzXWH5JX5PKm2 -f15IkScIvUliS0RepLfI0CXcFuzpJKi7eHLqca0cli8= +ZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPrdk2YZHRhd4RPf +a89z2Ay/TOby545N5n9R+kpQvyhnCbr41afzIU5DrBX7y8cKcvo7y9DkCdd17Xqn +4oYliSvVNI8B2nwkBz87BUYT8pNVD+QUc3Jf2P4Wj6XQM4pd9Ntaw7rOyIf2Oo3T +q58SSjbXYrqAbCYuX4cs/VWLWyq9PapBwxEPeQ756GZS3Xvq1IfCNIKvNLYRgExc +tHzkUWPf6WAS0lKydBcCobYvSMfEzPkbBlIhuDRdMmzuji4FefODY4lIzvnaD7+I +IiRC4+IY9xNhdH6s0UobpSIqLCSxOJNYwqhUQt6gNAO1mdJhUirV/XIlxG5nCGbQ +S77yeoBLIBEL1t7tpo3/AdEzkR+/wS11dSpcllSj+7BJaKBhTKVZrX1igMqUSbiT +F1Et9PnAkS1jtUy1w3Ja7FyPWfM8nt/K6vfNRudg/xwY0iY1RFdCXuMwkPZSr4W+ +QryGaPqm0RlpCpLHZBOxBDpf0ZcA37ullh5hjXsn5CRw/0ZYpanqrkrq2cVZLnY9 +8IrJI1QfQhHlDqUP7prR4Omk8C7edXBqQqE/0mqL7AEhppOyLedLFC7WwUBepmc1 +bNH+Ho11CZeSdTZfIgwAcD3v6MiMA5kMTRcW6HAHNS309zNJeDf3EeszTBXOSCqN +BBbk+oW8bxkTLRdHRgdlLT7N6qzLAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +WLhSuFZ6pnoDe8LQx6eMPXzRkQb1qsJpyjpegUxFe71o2e23V/1yMnTfFiO+DsBQ +PP8RkLWUKAvkAvqPyttJBx9U5ZYspsSsTVhPsCjUFZ4IG+fc/dVP1ZRid5HQJz2+ +bFf4KPgErZkJZR02Q2q6ZpKq9clRzbDkho56OZXLYI/o2Z4xADbhzpa0xt8sx533 +bm0rKvz85WxH3cimRjKaGKzuKg38ZaXmmUbsigV3dzImT00KDWmMmaW9SB8lIm2R +JToms0Qs+mOr9qD2NiRoiUd1wmgG2QpFDViIqAZKJjjeesmeV2CAcPfLztOZBim4 +6bRIOIXDhYYOyDgs52XuijXUr4BR8aQmqBrjnccCMcGE8Ol5ZH/IDg4pCRSduCWe +T7ThhH7BpAWYdgF3ITcp5oEcpXK8IdAMAst1/6vk7Z1JHIOejxksbLsGDYkaLM6w +yTn4X3Ak0X6bVmLAY+xAL/WjAJhVtDPqGYAmpx4iQ6QjYG/8gRdOiUI8H7MCK8+h +P0auhyyMmO+kdhNnzwuX/eeLXZfNvnyK4n2uHWYgwV5I+Kv282zw94UIQgwVQ2DN +/IbXD7K57s7+ff9Eff8L/B8rt1i1cmv01mEgQ4kMsLOClGaceGcz/ivfzDCosmsk +Xg/zVmdunUY0lswYL4SQM3BhWB3xJ4likHikfQHklM4= -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem new file mode 100644 index 00000000000..5eae58da627 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFWjCCA0KgAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJHAwDQYJKoZIhvcNAQEL +BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwNjI2 +MTAyNTA0WhcNMzQwNjI0MTAyNTA0WjBXMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMRAw +DgYDVQQDDAdjbGllbnQ0MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA +353z74lGXkwEc1n6r/M0FS1XTXhoVGMYIqK7HPCBOEGLeyyGwGfSQ7lRfT1xSkii +zBGG0Nod9cRT1CAewOSJ6BjVfkQcGEjlnVYm42nD6PMd9iFJj9Y5atPeFNvvr+wF +OFX+E8FRu8u9aEu7MIj+KCqoqBukFhFgJYX8sMbRROfLOPaCq0cSC+Vod4qR83+W +ITrQ5n8+/CC39uLY/oKgAKdVnmff595Uy76BVdYzuit1IRKwJxqIWMRrfNI+szmS +hdj0AHwgmwEGCaTNcOQyqvBLxW6qB5tc1FyV4LYv4iNftroqNQvlUbJ4UqVr55Fh +vZ38C1BQ4sWgo6FSS/B6u13clwpRzDh3H8tOMTTz1inUtg61Y49p2G8k3kNVH+QU +fRM4xvCkhFzIArgSiJ+/YUKboltSG5K28pegkk8RRMsaQK8g+NScKKu7/8ddRGE8 +454AqxPpzASij+djM0vxzgad6BB4e+iIVdj77NSjAxVAfg9GIjNHG1DZ87jLLgtk +SN2jaYsBRBRnmenslEGDtwO1SeWrzzicVfP9GRdiMLJwCkwv5P5hCzwrCB5eUPhm +tGHm4K8eXDAd+Ol9pKMySC79E5/W372wdbaO1fcAUKvpHhcRZnSusNAJkLGJYCkV +2gzTWlaeX4rGqjNVs4MSmNuMT+a0IafZeZivptxdgLkCAwEAAaMjMCEwHwYDVR0R +BBgwFoYUc3BpZmZlOi8vZm9vLmNvbS9iYXIwDQYJKoZIhvcNAQELBQADggIBAFox +3myVDr9yJkrF5+vB9gUlTv14JIPRd0OFCLcPOlHpvYKEwjRjTwT9oL3zU5PoRPX0 +AiD9sL5TOo0zraiFPUi1k5X6SoW/qU/kOJ/j5CgfChyyyit/V773LitM/cVXZGui +YX32V1zV9+RaCowC/16oHvfjMA8xNOYoYW83FgQ3GrKgRuqqVMT7JAHoDebVSqyb +w5W0G7RH3hHM1nCv51tnT1SZDn+qRBcX5faPUVARzdcRrZ/VSU2RoVIU/fPPiet8 +5TRioZFslZaFDWOLOuP0ZcOj5MsY3vQZtx2/NRgNc+iLF593YBUhRJYqfT5ePW3H +LwbZp/Rvd2kLucYd/W9WhKEzJKKvzm1V2hCDnh5dl32sZgdBzrdKzgyNB723cLR2 +cHFTIEj1Q/scay+iiSoV+VNfMSDQ71vkHqFHNhEqPFUpdF/SeooDFeQaDvYkomgr +Z9BJFtbp4kZRIEuPX+niTi0S/zwi7htiUn17wOIBcydcgG2GXBer5H3JyFnCXM1N +0jFQsuBFRj8xP71xzhN8YjA2Pe+MGYrMWiwaVMLTz8mdQ+Y2aEvOkfXFSaeNUqW3 +GYxAjEkhVCvzhOd6sD3QjLRX2qhwh8NJCJDbkTok66hno8QsHWASbaDiCMG9z7le +ci4dOHzu/buwqS4LVTmFWTn7mkd+FAlSY9Hj0WVI +-----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-ext.cnf b/tests/integration/test_ssl_cert_authentication/certs/client4-ext.cnf new file mode 100644 index 00000000000..f8425c275a1 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-ext.cnf @@ -0,0 +1 @@ +subjectAltName=URI:spiffe://foo.com/bar diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client4-key.pem new file mode 100644 index 00000000000..f1f17525a51 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQDfnfPviUZeTARz +Wfqv8zQVLVdNeGhUYxgiorsc8IE4QYt7LIbAZ9JDuVF9PXFKSKLMEYbQ2h31xFPU +IB7A5InoGNV+RBwYSOWdVibjacPo8x32IUmP1jlq094U2++v7AU4Vf4TwVG7y71o +S7swiP4oKqioG6QWEWAlhfywxtFE58s49oKrRxIL5Wh3ipHzf5YhOtDmfz78ILf2 +4tj+gqAAp1WeZ9/n3lTLvoFV1jO6K3UhErAnGohYxGt80j6zOZKF2PQAfCCbAQYJ +pM1w5DKq8EvFbqoHm1zUXJXgti/iI1+2uio1C+VRsnhSpWvnkWG9nfwLUFDixaCj +oVJL8Hq7XdyXClHMOHcfy04xNPPWKdS2DrVjj2nYbyTeQ1Uf5BR9EzjG8KSEXMgC +uBKIn79hQpuiW1Ibkrbyl6CSTxFEyxpAryD41Jwoq7v/x11EYTzjngCrE+nMBKKP +52MzS/HOBp3oEHh76IhV2Pvs1KMDFUB+D0YiM0cbUNnzuMsuC2RI3aNpiwFEFGeZ +6eyUQYO3A7VJ5avPOJxV8/0ZF2IwsnAKTC/k/mELPCsIHl5Q+Ga0Yebgrx5cMB34 +6X2kozJILv0Tn9bfvbB1to7V9wBQq+keFxFmdK6w0AmQsYlgKRXaDNNaVp5fisaq +M1WzgxKY24xP5rQhp9l5mK+m3F2AuQIDAQABAoICAAQfLTflF971F7/okK5dlUAu +rcVHyuSDTxaUWU6XQEqBKskCcRlq0H1fFRlx4Hy2Cgoo6ItA+fxlugXW8bosfD5C +9ux05O+tqE3WILFgabQJhyvaQTjdggFuFlHcG/bqKs53B0/l6FPF1Z/uhWzHmaez +4Zf3qnadq2AFsDqx73mNrDlIkfAGR1bgy6QocbhDSckjBGa7QbX0BHAQjl9imQBq +FTHuSDpF5to6kLe8UwfDdU0+wvB1lL3OIQ0T8wPqs8Cz1wuLPi6dPjc/SmoiSqzL +8RmaiJfLTVK8wiZ6NTe93y3HELAZoAh5ea5MTkjebSbJmrO6r0L+0Y8ykgnETP7O +Ug9PWeDDE15sNXIQCKtRe3QpHtJaoAJU1MGhNqwm9oKMcuSvBOV8XRuZORinTYRL +Q2ZD7czaT5VZXCQI4uHwE+KIlQF+658c9M9WETxClgUlhbzqig3ilUz3QUweaPvE +tqArjiYLsT2KtrgmsZ2DaDn2IlGSIRjXMZJ3kn6i49C0uhH2YkSZgU9/7kH2myse +3opxE1EbT4ARFWUbGgqXTOc/OSb9DAsxUK2u0eR/4yOMLQJkvxNJWQgwmi6N18iU +WdvTphNtMtmdsAhst9luwNaeJItzTDm7JeWx+MPs8f7PVOOkTz8HcBAvZnISH1Md +0i+0lBrBXbAcRK5X7tvhAoIBAQDwKPQA0uNk4Hemt5yke1jrg4B03OLimHeQ/1PY +I99hThh/RLncYaMxqsd5WkXXbjsWyGidKHYh3/cG9akmgU6D2Z16CFNMRhgBgRX2 ++LJkdS2QSuHPJlB9ERtOOiWFt7IDafB+tMKHE/VRQdxFRtvLe6pQMzP4veVXZsq8 +NNJGAQ8egUa6HDvkXzR2VDf2Kc61t4ZwT4JT6C12GnCfvXobaVkU1aWhcguoX8vI +o3UOkeracEKc/80ZRdFhA/nvPPXCobyjFjLi8WGp6PUySrVhN9eAdZaUBNeFHLdg +8urNvy5Q6mBAByEfHZeJNZbBeAEAw7S5YAxgL96blj2IPOerAoIBAQDuXazpCDXD +dG6XDZ9FS7MKBWriHUa54UgbX4JfQsOOFA8uxglIe5E4IKFSEbetlDORnKZjcmGa +SKTm0MLLi/kKrkhoDgi6HNbmbo9ZmKIhmEwws5L1TLeUdLrWj8GLgNphotOBKs1V +vQQkfh6rzovyFsMj44Xea8Kgx5ONVlB1L5pEepKdIyDRiQfxhwFox719HACSqCEa +06eFNGtUOLLqNMZhpur59bqgiVQtIZKis9juwzZID0svXBElpDrNvbWS1V5MCBOT +6AStW66YkmVWFCn7qQmNqMh4x19GveW8ajgrBSr/8GP/WXiACBDEsunWRORW57iS +KiPmC0uHlMUrAoIBAQCYTrCokRZLlJvtbIb4PY3gFw7xjmCJqn4xw+wNqHpzgI7C +r/hbjsRrrE5DZP/kJ3Fr+n92JAH/a8WDcWrsE5eSwQFBMmR5e/6ffZlLft/MHBBg +cU0SDc9/8chqbS/8xMotphMymDrCZeLvvKAQg2bDftM9d6ufNfdr3bH3eFxery9C +fmQ3hc5qAAMKhFDVWiBRWGn3ckVKJ3Ylb5E7jXQSTFaFgxU+9U/1YYOg5CFJszrJ +e+aTIRuWypOGPnpUwkluPRqgJ2TwTntMwYQ3d+/eDwcp3ek4SHXSYqrd3lERWQzr +niiakqrry92d1BGe8xdXv8Yuxn4yxkkcTUUK0O1vAoIBAQDLY0LW1BqL3B1A5m6w +QhdSxaydoz1l/cP5F1W20tDpulP6JSBmqIkQy0bbMCL6CSq3ZGLVGBQQAUwzZo3Q +AG9PncZKgy8PHux/UncejA5LfBgGtjL++6bpFXEXAzKyRhAQn065ODxcnBucx8CD ++ImQ17tKNClVz70SUzijsLKWSzfmlm/jhMXMBJCyle+t6EDXL72NZchZi5+1GTU7 +d+Wx0bY0PKji/7luob8hgzQLgEnp8MewVNxiXLyE0c0bIHR+BXGgjoOmAKN9CG3B +4ah1+l6YTXPJW+syo2u4gPA2BKxIiPBX0laA22bmV/t22vKL0dzECpSCo1JeR+T6 +mwZhAoIBAQDpLqRLxfZk2TK3wJ/bloXXsRg4TjSQ4m2Y3htVRiOQF83iERVRlAwg +9yKlyd99ux8tlYAK368Q+FFAwYTvUyghmfVTPARFTmeX0F5u+MX00WOa2FhPs3du ++ImYeQH3hg2O7qyVDMCwtqgIIuGLNwsVPqUUF5bx0He7wTwwzQmx3EVCOu6yZXG7 +Aw3qpOM2VhrtWgGP1mTONiUg5dh4sGbXX70gjG9cUpo/Owr69Q4Y8/OEyx9bzqSW +5BeVN0vONzQC+LHG5EvgNF6yOU7iCkuoDirZUrVchuAf+IDapK85TLIH8bm57LKN +Etg/x+MCoqlEQBVgnY7f3suMB89XerER +-----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client4-req.pem new file mode 100644 index 00000000000..224484f6611 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp +ZW50NDCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAN+d8++JRl5MBHNZ ++q/zNBUtV014aFRjGCKiuxzwgThBi3sshsBn0kO5UX09cUpIoswRhtDaHfXEU9Qg +HsDkiegY1X5EHBhI5Z1WJuNpw+jzHfYhSY/WOWrT3hTb76/sBThV/hPBUbvLvWhL +uzCI/igqqKgbpBYRYCWF/LDG0UTnyzj2gqtHEgvlaHeKkfN/liE60OZ/Pvwgt/bi +2P6CoACnVZ5n3+feVMu+gVXWM7ordSESsCcaiFjEa3zSPrM5koXY9AB8IJsBBgmk +zXDkMqrwS8VuqgebXNRcleC2L+IjX7a6KjUL5VGyeFKla+eRYb2d/AtQUOLFoKOh +Ukvwertd3JcKUcw4dx/LTjE089Yp1LYOtWOPadhvJN5DVR/kFH0TOMbwpIRcyAK4 +Eoifv2FCm6JbUhuStvKXoJJPEUTLGkCvIPjUnCiru//HXURhPOOeAKsT6cwEoo/n +YzNL8c4GnegQeHvoiFXY++zUowMVQH4PRiIzRxtQ2fO4yy4LZEjdo2mLAUQUZ5np +7JRBg7cDtUnlq884nFXz/RkXYjCycApML+T+YQs8KwgeXlD4ZrRh5uCvHlwwHfjp +faSjMkgu/ROf1t+9sHW2jtX3AFCr6R4XEWZ0rrDQCZCxiWApFdoM01pWnl+Kxqoz +VbODEpjbjE/mtCGn2XmYr6bcXYC5AgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +2JVul/xcJ+YlepOHxJ9dczIcXEjjMOBxWuyK+G9/6wASgHg9e2SB+WS1VSeUARC6 +VkID3Jlwr1gEw4gR3lW2h5I21kdCaBfCHshUoOr9rV5uE76r9kxgyEsMUZMvSClC +eQd8VK4fUT9JEKigIJeCFT9IE9PyxrdH1xpp89jOLy40t3PkubDi8WR8dvPckg3N +juLU/6EtbrtgFMnCqB2TmH4mc6YSCeENUTvt+nSiBKZUblGDuIxu/edX3SscS0Yv +qPM5LPcNHEGeeMC5ZSfotaSzRP+x3OlV9VJNROG4brbaI+3kECtegBgFvKIiK+JY +m7dkt8oIpQc8CKZkM8Kk6e3JXHzKf4vAiWHf0Wyag3gqCukxdas/PMx/3ROi7iDm +XQN713lxhIjtqfXQZjZcRmYQwdnkaSY+H7hgAyhnavqkBmPLeMU5hffdBswrjH+0 +fD0FOIDOWNM9e2Q/qdtHxtglNUmox0ETvl/3gYRkN1I56zNan6FNzGMubilntt2z +xXQwxP4Jn+RoAwb5U9mIlaLJ73FDbl6KAvFSJHlZl34R/o1nKOOAiFSv4V+RcTMd +x49P7cyAcW+eSsIgDzqabhx1OcrFEFRtBy342w5m5Qdq62TpFmeALgRYhAarA9UZ +YY/XOglN88K/3iR+A5LO7Hdv0Q/wShokghcSdAE3JOo= +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh b/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh index d6126d361f5..a09b7b2874e 100755 --- a/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh +++ b/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh @@ -13,11 +13,13 @@ openssl x509 -req -days 3650 -in server-req.pem -CA ca-cert.pem -CAkey ca-key.pe openssl req -newkey rsa:4096 -nodes -batch -keyout client1-key.pem -out client1-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client1" openssl req -newkey rsa:4096 -nodes -batch -keyout client2-key.pem -out client2-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client2" openssl req -newkey rsa:4096 -nodes -batch -keyout client3-key.pem -out client3-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client3" +openssl req -newkey rsa:4096 -nodes -batch -keyout client4-key.pem -out client4-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client4" # 5. Use CA's private key to sign client's CSR and get back the signed certificate openssl x509 -req -days 3650 -in client1-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client1-cert.pem openssl x509 -req -days 3650 -in client2-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client2-cert.pem openssl x509 -req -days 3650 -in client3-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client3-cert.pem +openssl x509 -req -days 3650 -in client4-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -extfile client4-ext.cnf -out client4-cert.pem # 6. Generate one more self-signed certificate and private key for using as wrong certificate (because it's not signed by CA) openssl req -newkey rsa:4096 -x509 -days 3650 -nodes -batch -keyout wrong-key.pem -out wrong-cert.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client" diff --git a/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem index 53ee3185b2a..073c6485bd2 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem @@ -1,33 +1,31 @@ -----BEGIN CERTIFICATE----- -MIIFpTCCA42gAwIBAgIUf7oSjl262zqxycxCt9R08BG75GYwDQYJKoZIhvcNAQEL +MIIFZTCCA02gAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJGwwDQYJKoZIhvcNAQEL BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM -GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjIwODA4 -MTcwNTQyWhcNMzIwODA1MTcwNTQyWjBWMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwNjI2 +MTAyNTAxWhcNMzQwNjI0MTAyNTAxWjBWMQswCQYDVQQGEwJSVTETMBEGA1UECAwK U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQ8w -DQYDVQQDDAZzZXJ2ZXIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQC+ -6f15rjbEl5DanEWi3YsMIPNwQ08waUrqFz3aCKeGcRujKb7uaX/I5LWdh8x9I++E -E2ccFlnJTd2dZKxjY9jd2pRXVHmVE7XLpl8qtlLtLjlJ889G3tbgwdLZqaClxJxH -0KQHH5wUwweqlfbteXeYUAIfhDRmoOL2qhUPLYi7E5/vpMeEL+tMn6fswuk7JIR5 -64NLUVC1/VXtjDli0YiIhE025iWL7FukUAifJKa/kYEAPen71bZcpT3uxYnALClf -rANg8uJ+DTDPeNjZbLjNTPRfqk4W7pwG8w5F6RhrMPwgqvBNrnne7OtCD2lzlFU/ -sQiBM8HA5gN2CJuDjl+F70KTY3nRkAxpmHvSl6RPTEx4egdcb69A85LmDjyV+S01 -8tJQgi8TEmXM+TadQo3Xz/6+MHBIr7MknRRs0l2wCCiNvkE5MxsT+Pv182wxGArF -aIvCU4dZI6bI2WZbYclR41Ud5O6XhQKYu2AhS7zY1+nhAMAvqyD5OmIIjKWi3GHP -vdSCljBQzs1tuHgTaKIeTTzFetnRDzyoRDbLbo/OhKEAybvIEqq5iXUmfz4RG+Am -ZUdxBZ6muueAf44mVQBKx/OB1BIzObaKy5s2gP+c486WR15lqVcu6FkPf8tO15c2 -lfpDXE5IvRBeSgi4vVdL4ceJtrfMavd9pa3YmGk+zwIDAQABo28wbTArBgNVHREE -JDAigiBpbnRlZ3JhdGlvbi10ZXN0cy5jbGlja2hvdXNlLmNvbTAdBgNVHQ4EFgQU -eebevYkmCqTDsOG16U3lTCIvqNcwHwYDVR0jBBgwFoAUT/2qvvJBk3zEQJj4KzWm -hB5HShMwDQYJKoZIhvcNAQELBQADggIBALVfgo+K+SHzNrerNVJI8U/50fi3WPsS -rsorNxYHaapTJB7/Ngn8Nv05YI1Er4Npl9X/9LjA0uwxYmW6zPGGoSVqGTXJD50o -2c2MVXrx3ZjkNLNw8OoIROU/JwFZRANYS9ECZVOYY2eHcci4S1D7izZP/7+8V+YV -l49Do+ht1nkpeOadWXsQDPZO3bVbUvkvuYYeaHGzULKWS4sHbXmFizrL3V4XeuyR -SAwAo1sRkYcJP5JUk8JCDW+5XFtk/X638RfZJ/9hGxW2gbX9T/Mgqsmi4TMajeNn -VWkq7+WmU4v9TKZARA0240CIiwmW95KVMYe5rWEB3i9yo0c4kei9H113q0Pp/3Kd -sNCZf14Wm8BhM8uUQTyOUyXQvDUx1JzJyZtXj3zGR86uqEGPPMJj4tWeP/FIcF3v -hpH3s0md010BIjpEVoov6q1qPe32WQL9eGappsiEbJKFpJ4YYXwtSgOnUy6vt0kp -TTuYQWBCUfdhKUmQHvJbFzA6OlIs4RRmWlUcFOFOcS79FzgcWS481LSyMNPNcQNQ -PGmLtxxcoRADs9++BltF+Q8V4MbH5o+ZQt8314Vg9n1AWV6L2poLNY5CxXMd6tVu -wDYHIe/VHyCBqsdh9u7XKpv27xfu4TdxDS1nNzyMm69FUlGvRC5IR2k+IEIKmQ2n -nyRbOKxUBKl6 +DQYDVQQDDAZzZXJ2ZXIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDd +l4bW8vca9/iFDASgmGxyCQvEK5roWwPzE/DXdS+mMJM/fffyqB7Cldm1KW2ILDYU +6JGnlYdxvWLOEa7n4cIBrQ2za7vfDU4LzGJTBN/9C2TYGFP13FTfZi2gZOuipT+M +pkFCfJG1zMWR13yKNxiS6Ixf76c6ADUyt7GmRcuKSctKzM7hUARfeko5iBN8aHpD +nxYa6xSGlHXHDzHHEqEpsk+iQ/eK6o3EeElg7v74sZSzqleIkEfFLiRIPNvuAwEE +IRIQg+k/2S673kdANlyfszXXbUGCYU9lOOPYgAEFgb5ljSXZJBTW3uFlyIONK5I4 +EHgELy9Hs0qnqttXCx8d5I/eI/+Zx9GPF3x8gkMi5wQePiBpqde20kRYDvP2eGeZ +eFTnKYpqCshVAJ4+1Kg96f4S9+GZ1wfzsgS1lA7+9qr3qTvI+XnKhC2h3bqyWS+C +BQikgADbURgZT4EzhXvq7fSCFHFTA6xZzXNEVO7DkRrYbvkq06bIl1Ov9vcbCN4I +zOuJJMxlk2Dv3C3mUox3HmcOdO+vvtWnZt3HbFY7ZPCV68ObSf77YD3O5on5RFQu +hk+AinrsDIL6NiiVzALXBL+e8flkqZDzRk1mGphVXGcRP6nn4VtrfN+Bmbm8pu3m +6aYuqSX6vQXb7EHW1tAbvlfbxIlP/Hp5GoV8zqI/tQIDAQABoy8wLTArBgNVHREE +JDAigiBpbnRlZ3JhdGlvbi10ZXN0cy5jbGlja2hvdXNlLmNvbTANBgkqhkiG9w0B +AQsFAAOCAgEANvZ7QDkHIKRq/g4GPkuiU7DN44/TW4bOFe7rDC5S4z5sh/i/Tvur +JYW7m97vLui5PJf6Vbd7xyl5MFIiz2KzoLi26rlvYcI/BT8mIG8jMg7pJjp/wJGa +QdCxdO99a4SIwg7x8pvWChFAOij5e6RhrIvsEB0LN5kKRSnQ0sW5khfsdFbn+3Iy +VwyvvE+nsCqE+KK358EMHicn8FVD3Ze+YzckX0am9DbshL5+eVQ9nOhUV2B8PcbG +SGzqJF07wOBwCdcn3eY+V98SQqrpGC9tCXmv8qErfkq7pkUGWq15d+miF/gaUz+Y +yDPwgi1pephBJ34IhLUUk0IPZJ23uVv/zfB+SpZ9/5pjsmnapR3Zf725jWrhjeT8 +44i5kNeVCvZPzQO9cTOsLXJbWb0vqRzKsvuSvffDQZql4bMMvhPjMibqCiRuSHO/ +yPlWiJjhkZz52DPJX6+LOeP2pFfUe2TR6IqcFPfUs/bV6aD2L/s5UZfZXWS5i5FR +I8uvcKOWL7NBbdY+NVE5aT7DqfhaRurjp61Aym18FgXLHpDHYV9IpkU34+A1MBUi +bzHZRWhxZMRxYezC7jE4zsZ5CQtSq1miDPcDaeK5vMd/Vdys5MIekqCfKUh+Cd5Q +gfC2QgNgodcWRF2plNgA+3E0dUULR3+1s83gGWGC8/UFW+9dtYV4nv8= -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/server-key.pem b/tests/integration/test_ssl_cert_authentication/certs/server-key.pem index 9d4aa59a125..067adf4e1fc 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/server-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/server-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQQIBADANBgkqhkiG9w0BAQEFAASCCSswggknAgEAAoICAQC+6f15rjbEl5Da -nEWi3YsMIPNwQ08waUrqFz3aCKeGcRujKb7uaX/I5LWdh8x9I++EE2ccFlnJTd2d -ZKxjY9jd2pRXVHmVE7XLpl8qtlLtLjlJ889G3tbgwdLZqaClxJxH0KQHH5wUwweq -lfbteXeYUAIfhDRmoOL2qhUPLYi7E5/vpMeEL+tMn6fswuk7JIR564NLUVC1/VXt -jDli0YiIhE025iWL7FukUAifJKa/kYEAPen71bZcpT3uxYnALClfrANg8uJ+DTDP -eNjZbLjNTPRfqk4W7pwG8w5F6RhrMPwgqvBNrnne7OtCD2lzlFU/sQiBM8HA5gN2 -CJuDjl+F70KTY3nRkAxpmHvSl6RPTEx4egdcb69A85LmDjyV+S018tJQgi8TEmXM -+TadQo3Xz/6+MHBIr7MknRRs0l2wCCiNvkE5MxsT+Pv182wxGArFaIvCU4dZI6bI -2WZbYclR41Ud5O6XhQKYu2AhS7zY1+nhAMAvqyD5OmIIjKWi3GHPvdSCljBQzs1t -uHgTaKIeTTzFetnRDzyoRDbLbo/OhKEAybvIEqq5iXUmfz4RG+AmZUdxBZ6muueA -f44mVQBKx/OB1BIzObaKy5s2gP+c486WR15lqVcu6FkPf8tO15c2lfpDXE5IvRBe -Sgi4vVdL4ceJtrfMavd9pa3YmGk+zwIDAQABAoICAFAqklEHmiDYIi/iAg2/KLWp -mn+QDA8gj6AXrVXCX5JBKaFMlMupCksR2r9w+CmuLtPPj/MH4aD5rO78CLVdhoG2 -WKcJJlq2oZaH4JtfXxgQWmcW1XmFdkZ/rSnQJFCnbBZt4orJN7GyKaR0f3E9mb4g -DpwsWBKmSVfZmKk8bhdcSMMI9uyncI9G1W1CdUxr66MEhafZV+JrpCrxQGGh6cql -f4TnhGmqkNrA7rXg3pI/p6Mx3HBuz7o8evKqCUtkX+U4Jl0N8JSMtmvQa4J1OG6g -+1a2fT786BC0/E/X7vSH579R+EEuXCeDZdBiB15MsbVigdc7JAd7roVgXOkTnkE1 -miQeGhP2J4b+OH8fTLS2KfZ9rW/uUFI3kO3duMv6+K14fIzXDZMGduW9f+Tf0gqf -bj5A8Me93fddU5UHgLcJPwqbKXwJjvpnc4c0Ntl5op/2NieakvSJ9l0SnqzGcLx/ -Ufgiz4djaRX/xd1qepOHYkXT0egVec5kJvY5uKgdkWMH5ZlL5viv+7dZNBANAzpl -3K6j5N/ay7ED5cifeUReuPeNw92w5Rvq7OVGWR6lEsexFf+J19fefPo43PQQFtLM -W4T2G/Y0NHkkUsY71CMFN/Oom9wNLq2EGq0apljphf3pAVf5aGNSNwJkLBlmwJDu -NBN2AZQDd99zJ5+LfkotAoIBAQDK2koIG46VTDmhLAOZFNm9wMBALjFExTwo7ds4 -J2GmAphAgjkaxq4KFePc9uzVBx3nkP/4QSWpq/IhJP1usFseSeAR4SCRx2ARvhvO -T+QQ2TTWIfH+LfVM+Vkg38eXJrAF6+UT2EFkvFg0gO6bW6q/SSCxZvltLKn2GX9m -ql3SeFq14r3q05D/sAetdJquAyY4mDZNUEh3rK9ueVNODXSw61GvCABEAJvrNVeD -4iTOmL+51hQaNubuEyxvmmMVv6ougo1MwQmC2MdnwlW5DmUDM4oHjiEZ/1bumEMK -HMUgAYHguwRug28YU4D7MMkKWnhCIa9tEeUKlCDi5eiqtoAzAoIBAQDw7vO61oAd -Aw1lS5UBKEovZfM3aDZKhFsqkhbRBB+FoYtWJyEm6OTZL7oJOVPS/BZFzI/Iy7XG -D+4aeTrK0iQsah1oim4VHp+A6jBOm8VuMJ1vJE/awXXqAkxPnoEaDPJI0DNyVJwx -ah0wH2qAMVi4eXvBHaYemx+KaVxYiXp6veupwWS6ZIQYxKsXbudbs0u8tKEblCGm -KFbeTN8lI2Dm0YwLsrYL4HU88AYYzW1xJAaeN7IV2okOdu8bxPZn33MrQT/Ag58V -FI08yWqxtxQ+2Xf+rpT9KV8GzrCkZ23aSxT3MKjhx4zg3IGEwZYeFmglDgxj4Xeu -WBanAMmddzr1AoIBABqRzPCS63ISsdmyciYy3PJFtOizJDDlxKN3xCbuwZOE83w/ -kks3isQ3ackfbpXYgMo768sQfWZj5ysANVGyN70X4Al3e1Sc0LCCPhIf1LeAO7Nw -bsnkKyUR4+KybOoXXybnZvHaeXZAVS1LVfVzZEH5yhZybmkmWHyrikxgNDnFGdyI -/mcrnupenCLUCw6PywnOpg4qXdOPAsttiMPIb2hc6i4K1j4R9lowem4DyN5mk3Y7 -3BYy6rx0NnXHuwK2QAwnfWp4Pk6tJjEo+yFqCUEKFyI2M4+8Kh7GQsGoUwmGKQTO -eh4fiQWeql48XZjFkMrYrqmSlnV0QXp2sqrpjJECggEANomwjEOP3oFZX+ubABcR -q+cFBi7F01pglKNbHNc1F3e45biXzYn8e5lNIFdkvSapGAW1KnvKWtoySLXWdCDb -ZV5j580mHAvBiVn5s2GZcFb63DS5CsiuG1mH0qILkU0K7yaJ7sBuVtUxZDpITlpd -Pezp8Y33k4gDvL4a1EPSgMRK+zM2zOaB7GVgYT6OinhslXvB2E9Qyp7pAwsdm/eF -MNqQO/mpMckOYMvoZWbi9jB0ew4ads3wJmEPwRZ1vI+dL1ZmyvpAYXI0gmUJjM7n -e06Y1gLI4QGjbBQPcjejaz2Bsm7GW81i+2eOvfFgPNFPo8upTFa2U7XG8ui/urBJ -nQKCAQBq2XWZJFgN9GLRWy7Yw+n0s6W5vyD/c8O9VTuTn1DafOVb5KxH+egcxDcj -0T39nyVm6XhchL96yu947bWgCVOiR2Yu5QQ6xw7em46VZdbTtwjKlW/xRYGJKW8v -utcd9C3kR1r92oLbtuWcoBFcCW2MMhamU8H+Wg6Mj5v9+6FhWcomqHaiS1E9ZA4V -qhPCheE6XZGlo6+ar9gIXJcGP2ktkStAWT+0O8stc8xvF5nS/y35ZJpIxrMOsNQS -9+kxj8ouH6D49LYm2ZFtgV5Ink32zynAzFUm4FcYe0fwo6ADzS/PH5eSL1fX3iZP -Hg4+vB3JZpIQ4jvvlZ+GdcG5eW61 +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDdl4bW8vca9/iF +DASgmGxyCQvEK5roWwPzE/DXdS+mMJM/fffyqB7Cldm1KW2ILDYU6JGnlYdxvWLO +Ea7n4cIBrQ2za7vfDU4LzGJTBN/9C2TYGFP13FTfZi2gZOuipT+MpkFCfJG1zMWR +13yKNxiS6Ixf76c6ADUyt7GmRcuKSctKzM7hUARfeko5iBN8aHpDnxYa6xSGlHXH +DzHHEqEpsk+iQ/eK6o3EeElg7v74sZSzqleIkEfFLiRIPNvuAwEEIRIQg+k/2S67 +3kdANlyfszXXbUGCYU9lOOPYgAEFgb5ljSXZJBTW3uFlyIONK5I4EHgELy9Hs0qn +qttXCx8d5I/eI/+Zx9GPF3x8gkMi5wQePiBpqde20kRYDvP2eGeZeFTnKYpqCshV +AJ4+1Kg96f4S9+GZ1wfzsgS1lA7+9qr3qTvI+XnKhC2h3bqyWS+CBQikgADbURgZ +T4EzhXvq7fSCFHFTA6xZzXNEVO7DkRrYbvkq06bIl1Ov9vcbCN4IzOuJJMxlk2Dv +3C3mUox3HmcOdO+vvtWnZt3HbFY7ZPCV68ObSf77YD3O5on5RFQuhk+AinrsDIL6 +NiiVzALXBL+e8flkqZDzRk1mGphVXGcRP6nn4VtrfN+Bmbm8pu3m6aYuqSX6vQXb +7EHW1tAbvlfbxIlP/Hp5GoV8zqI/tQIDAQABAoICAQDaRKlTDRwN+ndXRlFAhyM6 +6GIopvL9MLmhM+EluY5n2q0P+1rCMIusC8LYSahUW4gh7DucoRM7G9s5M/3e9mcN +E5LNSq9RtF9OC9JGCCVBsXlxyfTZ1l/bdWA3/3CDUtZYCmN5xA4az0tErsdDtaWE +/39V+E/2N8Iu5PYd293zp2CRm0+kbBcCnQiDxt+6yYa1GPzDIw+iyJWCsBrOBjGt +SrBaGyy4LvXZsspEquWHvhPFLWLvZ37qYNroNNpFhbv4f0K19dlJRPpdn0L7oxB1 +VicQvdOrQ4LbJ8B2vw9Ch1wt12ySiJHmXMAUa//4jBSJGN++72NY8uf0Y72N7ayF +Bck5QE0we4i7hhuN0WL+IftYD/O5NgOnprjMWifLOhQ8OECZfhOKgbRU+i3aJl8D ++raQBW7/GM0uL7xIoMcEZSwMs/sQR4loNCJ0UsIeWTdWNrhXrEuzDQGXoWcB8K/r +VVayDO5Qqx8R77HB82/pRdqEWNaNQd7DhPgnWiixISpIGm6zMvT3S0hzEkxu7FNb +uciq9i82BrBkKhg1kiF70FqG13VeMFqTJUuqtoRs1QEQgumvWB47n6FiVwHdDbbH +sUeKZYwbrY22Cn4rrfXH+0KKM9TDR0CiCv+CkSGmG57l5tW7aSUWun46qP8vh7sc +ztzb4LzyUt6XEBIWIqBIQQKCAQEA9+f4TyGo88qzTKaQoko5OAYhvAGr1UGAS6qh +sJpd7rHd9g6DnXyOInpQNglToiJ94mNdIK8f/oOjUXZh2E4CWuxeK291BNiaqCxe +s3LS3XjkdHQIHvqJUw/r4YJ+zfoGznthNbDwDkBob9x3h9rknTbGdLcgaTGi/0PZ +cFnyWDPNPskbcZ3Dxr41oDHiVsOx8n4d4HtspXzh+EPiQiJz5HVfwGNEyzhmFWIa +EzQaxnHL+WF1Pqe1wrzOwZia0Jss8lxbHcTnJupaV5WDvbxY0E4ynofShicv1U76 +B41xDKP/8hFWck9LiMDXk9vrbQIHvGAcsYr5N/jzIrDiEXxvGQKCAQEA5NOegb6m +Ak0mXg+XKhZnSdR1KaWT4/hbVWqYjwxAeAJfMaxjD4MXA8qeEAJo3l3ETkdFCHp/ +lr/BZzNYXnmHU6uvDn2Xq8gPO04ruSV0WWthikXb5gszXdkUH+6fryHn6L0kYJQH +NARQzOgdEcmTP7sy/5GDKzydWbT5qRNOnESUWgnJi9ePjGB9zWxn4jx9AfOYtozh +UmEgofSDGbFlamQic8HGnSJFgOxIZ0AfurPIRSR51gvXx2D5TcsPjLlDrY07IcF3 +DjqfJl0gC1XN5BXdpPvjvNrns+ZK/SRoGlgb8Q4tZLQevox9W110amvMeZj4yMTK +9mgGOSYCzZ6U/QKCAQEA1mBZ4Qwpj1DNRk6PqlfnLSRYTb1gO9UdvdE7a33CFuTn +HZ2lgS2xt+zvqhrcoMuU8o2cfeQTFcP+Gjb2G9gxvzDBqmwC1IL/Esjzx9hWssCV +RoMEds2OrS6Ke4OeZj59XldhU83DeX+HEJylHO1UXwN8EHg/5dfPrVCeGsMdh9qb +9VxxiAm2wAnCU9pvcTpfimQ3L+VrqZvZyRfi8+/ZKkm52KO/XMFTvdAM3mhjcxH7 +Ipd9jQX4bwNZBB8UWaqm7pqhDJg2j/d+0lhwCUZzwwasTV0E14/RlHNsUdWlWhoD +/e+yQr2BgyvIAIvgBW8JA4RVq86S/y0gC/LMO/TQGQKCAQBB2rlaY7DJJsTs+xWp +EiuFrvRNGQ734+j9KyFewcrn/t7An/keZL7B45UbzGW74UZ2tMIkT4TasLMLbVZ4 +UgdlSBqoU/LLiFcB3Vxt+16BwYqfzb0cdorA7pGBIx6nu11PuOd4OAHesYNDhWWg +Ud/jzo89x/X1AovSXmgfhaPxCzeatghgC5iPcNGjxhgbnwbnAeEoYGEUYUmP8pus +UEZ8mPblU5ZCcLOKB/ZKaMT46Xawl2/M7zmZcsos3kzKViMpFmU3MMN/v9U/qDtp +p7cKdlSEf82p82INfzCDq++d7U+VT1w3CDN06V/GZJ31ZrLBKAopVaGHyqZH2i2i +WYpNAoIBACmfr9BoJh1/mbLdd/WpOyORKTbnwstlMgoUcARRJn86iPZuyI4QoSSb +TePZqrWRVmO/K5M65hFjhUpqTWKJGJy5LYIZ4yuIbonJAPNUhjA0bkar9cULBFzy +rb0xmW6sRlBnqhv4aDlOkhHkkR9lB9rTIUW+ankuvVBiGWo4eE8DvZYo30frltku +2K/kqd3NppTl7dN4EnGTo8ROZvr3EMwSu6nE+wUr4G7YuCLdPxwb8gAB8dbmaUsn +AXocUh96kYqTwRxo8FO9SqgQYMf81/ovPUfv+7mwO40oygzy/YkGmB1shFIbQuzU +lJvRfdXyyC9DbllQkxWfdvaanLS3r1w= -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/server-req.pem b/tests/integration/test_ssl_cert_authentication/certs/server-req.pem index 714ef19ecf9..bd8e2e1fb7f 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/server-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/server-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEmzCCAoMCAQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGc2Vy -dmVyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAvun9ea42xJeQ2pxF -ot2LDCDzcENPMGlK6hc92ginhnEboym+7ml/yOS1nYfMfSPvhBNnHBZZyU3dnWSs -Y2PY3dqUV1R5lRO1y6ZfKrZS7S45SfPPRt7W4MHS2amgpcScR9CkBx+cFMMHqpX2 -7Xl3mFACH4Q0ZqDi9qoVDy2IuxOf76THhC/rTJ+n7MLpOySEeeuDS1FQtf1V7Yw5 -YtGIiIRNNuYli+xbpFAInySmv5GBAD3p+9W2XKU97sWJwCwpX6wDYPLifg0wz3jY -2Wy4zUz0X6pOFu6cBvMORekYazD8IKrwTa553uzrQg9pc5RVP7EIgTPBwOYDdgib -g45fhe9Ck2N50ZAMaZh70pekT0xMeHoHXG+vQPOS5g48lfktNfLSUIIvExJlzPk2 -nUKN18/+vjBwSK+zJJ0UbNJdsAgojb5BOTMbE/j79fNsMRgKxWiLwlOHWSOmyNlm -W2HJUeNVHeTul4UCmLtgIUu82Nfp4QDAL6sg+TpiCIylotxhz73UgpYwUM7Nbbh4 -E2iiHk08xXrZ0Q88qEQ2y26PzoShAMm7yBKquYl1Jn8+ERvgJmVHcQWeprrngH+O -JlUASsfzgdQSMzm2isubNoD/nOPOlkdeZalXLuhZD3/LTteXNpX6Q1xOSL0QXkoI -uL1XS+HHiba3zGr3faWt2JhpPs8CAwEAAaAAMA0GCSqGSIb3DQEBCwUAA4ICAQAx -Sq/dJHjSVa1g4clFjtdKciFsSnCm/vgzGInyxGL4zTyTf4QXQ0PhfHkiFFCMkFSP -snxavti5HjSCJlUkhB/x4YpqFPQ+/9Uly8RCKKdlMTSiJ30IL/D4dWtmwA83UQAY -ZI6b6dvjdhBNMDb5M9Qzv4+PmF/KMB3KlFTQtDZoAqAnWrtahsVJzsaawK4PPc/e -4IINu2O/aAFnJt+ewwA1NDrkaSlD7Wgu+SAlQRPO+vAKS6Qbs69R/vDdVECJOTmB -FJ9uQlXuhwsR6u5Pl0Df3Jh4K+EXw0nY4LEko3915HnKAQt0F4BTrHjW3Sk2WnMN -AWtp+4D5epRvD5VpL+mwce0PLH6rUb4Ipe9zmApGQr2GAO3XjpfvusvUJPFcWe2b -EfnBxq/Asw1ALqLrT/LKpZHRvNN2YpBLl8ZrzOsNwqVPMDTPUYWf17wLS+FiuCHD -BTdMIoqZ0dmp1ZmENB8h5zM8W+XMlVQlg+LeTVqeEA5Jgr7zuMObQOar0K+MV00K -Jqi2ba/v/zFtN31rH+wULfV8BPdtrVTbJMTrCJKLpAwKjsO7wFoFn0Qk7WNEmPmD -+TA65ilk0xfok/04pkh1gd/Kqzh1LIOpG0kmh410U3AJ2jsF3Sop+apH+r+Blota -SsCHnBqnABNRs6gs5FA1pbD4t81pQl5xoXtCCuZbPA== +dmVyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA3ZeG1vL3Gvf4hQwE +oJhscgkLxCua6FsD8xPw13UvpjCTP3338qgewpXZtSltiCw2FOiRp5WHcb1izhGu +5+HCAa0Ns2u73w1OC8xiUwTf/Qtk2BhT9dxU32YtoGTroqU/jKZBQnyRtczFkdd8 +ijcYkuiMX++nOgA1MrexpkXLiknLSszO4VAEX3pKOYgTfGh6Q58WGusUhpR1xw8x +xxKhKbJPokP3iuqNxHhJYO7++LGUs6pXiJBHxS4kSDzb7gMBBCESEIPpP9kuu95H +QDZcn7M1121BgmFPZTjj2IABBYG+ZY0l2SQU1t7hZciDjSuSOBB4BC8vR7NKp6rb +VwsfHeSP3iP/mcfRjxd8fIJDIucEHj4gaanXttJEWA7z9nhnmXhU5ymKagrIVQCe +PtSoPen+EvfhmdcH87IEtZQO/vaq96k7yPl5yoQtod26slkvggUIpIAA21EYGU+B +M4V76u30ghRxUwOsWc1zRFTuw5Ea2G75KtOmyJdTr/b3GwjeCMzriSTMZZNg79wt +5lKMdx5nDnTvr77Vp2bdx2xWO2TwlevDm0n++2A9zuaJ+URULoZPgIp67AyC+jYo +lcwC1wS/nvH5ZKmQ80ZNZhqYVVxnET+p5+Fba3zfgZm5vKbt5ummLqkl+r0F2+xB +1tbQG75X28SJT/x6eRqFfM6iP7UCAwEAAaAAMA0GCSqGSIb3DQEBCwUAA4ICAQBc +TPShRdB7ZIL4xQNAxWLGoEbshbY/UpJQZdjojxn27roVwEhwP6B1/KgiVKV2X6bE +a36LUnaWYllIMAh4oOHJkIm2gZ3xitdEK1anCf5NJga7TnkwGfD4jTZA91fWCynt +a/64s0KQggKsUVY12TTJVQvOH/l9RMrXOq+jgIh4OURwCBtHTCS6oOp3eF04pEu7 +w54dMAsxp/N9AQaLh14IZUZAQ2v5kdipL99EEQH/G6llU08XrJ4LfGgwDkUjJSsA +TzrX2uk9MLHJVwjpZ99ktjNBs8Gyr4fGOmstT5TXEOO6bVhqZDC6kEL7vrmFSnDZ +g/9lrd4wLUT/STt+E4Qukedi0n/419IpkIAE5C1HOXRnQaOUdcnrLixUB21mBHt/ +n7gkwdY7Cu77dYvBIShzeCnxiJED0+XrBPD5yPokxEjE3MmiIK6meHHCuIwqLjX8 +I78ysv4COeH0svwSjvJInveS9QRCAWBpdvskxPJR8dpoysAff+jiyp3ZvkwcIiUO +Vbsusorpp8pFJXZxvPDBXCy5TOlFnIG/9itjPj98pFRIl5bxzNwDf4wrkwHEpR3i +jpM6y+/RWZn69BpTeAG0vZHhGk3SuXdU56cRzatys4X3biCImgDqeJMUcAoxlrIZ +vgbJVTorwqQmPz5kt28b8ddnUVobbZEPlRITcqjwFg== -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem index 03ebf989764..b56a10f8a92 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem @@ -1,32 +1,32 @@ -----BEGIN CERTIFICATE----- -MIIFjTCCA3WgAwIBAgIUUghXstot43OmqoS1M2rmdSRRX54wDQYJKoZIhvcNAQEL +MIIFjTCCA3WgAwIBAgIUMcX2R8I2H8vNtASHi0EoufIgWEUwDQYJKoZIhvcNAQEL BQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM -GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xpZW50MB4XDTIy -MDgwODE3MDU1MloXDTMyMDgwNTE3MDU1MlowVjELMAkGA1UEBhMCUlUxEzARBgNV +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xpZW50MB4XDTI0 +MDYyNjEwMjUwNVoXDTM0MDYyNDEwMjUwNVowVjELMAkGA1UEBhMCUlUxEzARBgNV BAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0 ZDEPMA0GA1UEAwwGY2xpZW50MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKC -AgEAjQjHTorL16xTiJFAaxeC0GDP0uCIbT1olT8NmWOePURqg7HpVzBtvffxiug8 -l8fF5kr3mfs9A2XuY9Of/A8uDTm+vdSEjdTwkAox4355oCHDQo3F0GsfJwy5f0vP -t1vKhMmvBHM7cN3aaPBnKJwM3Qd1XWxyEeCv2SDvaxhwMO9Mveal7WUGBKms35Rz -PYEVejM9ccy7nZ8+/wLAx2ixgs5I5uo/RkJkCMVGlKk6BPRLFpiG5tdvrUmFYNdp -yOV2iVEJ1McPKkViUfbVUII4UlvVrsgRNqsWn4ukD5jd7a7AzvfVQq6Mhe7SqDc2 -8c2aVLhoqxYpbKmcYYJGQAqlXgOObs6DEcyitXnK34RkltMgjrl1GyqqqS9hGkp4 -XBne+rw+mbH9jfBdQpi4Xp79l0NVQYahS2iX5HFYRBa4i8SCemMGpVpHrK4L+X8u -qINiLlVXIH7FBTBgz5EjvMsgihdBbEKlFLqUJsPJhRPyBmIewZMGZnsO8PR8av+P -jFp5iBFE1RtIcj4mg1QsjnYxA1QjUtPnqPeHqph7qxtIvjd/j+oXpcyal0xkGsh8 -G+sdYZXCktuocMDmm5ejJs5156znU2yHwN0/hdGJYGdKYWHjSVPT6sa9Q/blJ2IN -/CHSf7ao6FPq4XuMynN5N7K5RbQ22oYuCmqcGpQxtwWRcA0CAwEAAaNTMFEwHQYD -VR0OBBYEFFU4ee1zFXfU+/UiBHqDt7gbKn3aMB8GA1UdIwQYMBaAFFU4ee1zFXfU -+/UiBHqDt7gbKn3aMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIB -AC3qz446kylKOhNkssTXnjmFrZJhZLD06ijE15rhIgHBP3FXk1RZYcWj0bvERZ2n -zz0IO+Hy68M1jBV0R5YE8SyNVwEec5QiTYhoFoGsBFvlf6zz8oBhSQKtrvpvefdO -8vu+BcvX5C/JcOPrG9y6feTEWa29O7007LM+GRJOe4i2x6xkHGyLWvCSAm9fvHRc -gs6BcRIJLULwqlRaIHHNt8R4pwVaanJ2qQ2Fctgbx9OYkQzQK6hOjo0UUMvykEhW -XW0DT3tYGb9keEG6lPHYcyOcbkans+zJmHxSqLiVPz7tm3pwDtd2OFCi869y60WS -zzQNBhXCYkh782dF+pnOxV6hmdPaLqx3tD/WrT72z1qNK4FEgVIzSy5ULCp3WcgZ -ARvMgCMV7CK+rDID2QHuwNmJIArTXX8JRrV/zkUgsdapGpF5zkQ8rNGRJcDKzBww -CiVPOOthSbf+F9jLQ5nzmlFa6rJ7RxHuj1PVwKhxc8u9bPHAgZSt3uGH4QaFh5bw -3aoGucu2yhnkv4aNK6w2nj04K9gYdLwRbJgTNqR1FRCEXzfaJF1szq0dhXQzlhf3 -kbP/Wh+RhDVqt2Eu90LbmbUl+DbIHiYdjc67XFNbppXNn8NpncZKSDH/xn4KvC7z -ORsznqTC1pyQp4SuvaKYFCOWBzMZ60G5T8CTehP1KmKR +AgEApABgxjCrAiDY529xEjboRE/PVwgVStD0dPE2vSOOqxfVYYOqP7VF7pG2PeA7 +Ek9Qqv2cqaDWbDWn5vT3AiN+aslfHtXJgGjPBrpOQ3Me6RSu2aUHPIguabUjz+kJ +VSkoj0pkbAqqIxKse1hZtUNHVwOmg/PthkpGPNxofNX1kCfBWoNJyuKFOoGeNgmY +6joY10zTaiHpq8hhA3b7WY35QdNGD7SwkYBvTjGGzBr/hu26fhX/DnceZ9kf9n6q +899gB2kZH5T1eTlShh12TI1sHa+BGz1YwR0HqM88zXDyAf7bWl7Hy5f8e9keZdx7 +Fx/ws93Bb3GusA5zwUm1iUe1OIwTLFlL+Kdkr0qzofDcQ0ZnNwrME4oAhCJFwSnJ +OWnrCmUou2XXj86Xl481uBana30zzJ9TniHM/hA54ttHsva/yB8tyoXcI4FASwk3 +GdihsOBbRS6KvmeKpEXQpsvBQ9GejSL/UUWuKg+O0ysHE9+QX/+OznFp66h/x7PP +Q7g6ipwAjgwuG5jm/Czz+dw4j3Qp5N5f7Dn3QhDzmXkKgzRzirKh9XVQqUFRwlLn +8VuzAhD5SjRN0JDE2jlt0Hin90zx/nkOV2b5hTYu9NVgmrfCye6uB/qsK7PQBh69 +9i4+8tBGXrcS2Nenm+Hm12fFhNum96A0ahj134H2ks4JcKcCAwEAAaNTMFEwHQYD +VR0OBBYEFIZYdI/00qzj+5JqEzEJfpj93AphMB8GA1UdIwQYMBaAFIZYdI/00qzj ++5JqEzEJfpj93AphMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIB +AE+9y6Ea3pjIegOeffXzI725vXO0OSuv76vRoswUVQDKL7j7QcvqdcKCK/2hmeMW +MBNwKZneG5iumH2Yp5RQ14arihS9+SYEpgftvfNOZrzZ37RttJhSjBQ7GUorH7pD +uQ4s1unqKXv981JCtlKOQh5ZKthHAMsP/sfYuWg2ksVn1hvFTJZirewVUOgR8zPB +Djl6PdjZLDu1FlglQQ5YUNgsJbAqkBPcA0hEwGU0j5QEncvdspn4LiH2/mHhnzM9 +3QEvsXUxgJo989Am6+E77XNX0wcALj2zUmPPDWYgHgLKO/ZcSAOQ9JaauVhUre2D +7jPwnN47yLak5obVcyCpaDPKYk6sEUZkiWRaONvugoIbjYivmB/BJc0njfVA0kzT +FDwpUTtSddZgHzdTXe0p5C7IGmYkp/vgKlSpSYY6+aCiVApJSdJjL6FZKoOXqDnr +OgoQGSOJif4mDeipKOdrb2JtYwJkRl0c1S+tgOi8PU+ROvZxQGWI9/i20H58M7j0 +r/WhbudhcAqWglk5WOpCodbJhXffCrbUm5NjoFr7AKswxLJVz39WIe/duHPEGV7v +jLd/zj7eJRv5ycDyt91rbGxQ9NKzEx+by/5WIZTi+z+2PG75tdpQUwgEIh1c/XOt +6uXtS0sNnnjHVmXPBC+Myz+1NolYWjZMcBQ2xGIORvm8 -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem b/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem index 834f82fe80d..3924eac91c2 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQCNCMdOisvXrFOI -kUBrF4LQYM/S4IhtPWiVPw2ZY549RGqDselXMG299/GK6DyXx8XmSveZ+z0DZe5j -05/8Dy4NOb691ISN1PCQCjHjfnmgIcNCjcXQax8nDLl/S8+3W8qEya8Ecztw3dpo -8GconAzdB3VdbHIR4K/ZIO9rGHAw70y95qXtZQYEqazflHM9gRV6Mz1xzLudnz7/ -AsDHaLGCzkjm6j9GQmQIxUaUqToE9EsWmIbm12+tSYVg12nI5XaJUQnUxw8qRWJR -9tVQgjhSW9WuyBE2qxafi6QPmN3trsDO99VCroyF7tKoNzbxzZpUuGirFilsqZxh -gkZACqVeA45uzoMRzKK1ecrfhGSW0yCOuXUbKqqpL2EaSnhcGd76vD6Zsf2N8F1C -mLhenv2XQ1VBhqFLaJfkcVhEFriLxIJ6YwalWkesrgv5fy6og2IuVVcgfsUFMGDP -kSO8yyCKF0FsQqUUupQmw8mFE/IGYh7BkwZmew7w9Hxq/4+MWnmIEUTVG0hyPiaD -VCyOdjEDVCNS0+eo94eqmHurG0i+N3+P6helzJqXTGQayHwb6x1hlcKS26hwwOab -l6MmznXnrOdTbIfA3T+F0YlgZ0phYeNJU9Pqxr1D9uUnYg38IdJ/tqjoU+rhe4zK -c3k3srlFtDbahi4KapwalDG3BZFwDQIDAQABAoICABhnSHVittreyqV63LarHsuH -tO48Q98DHTGV83hzMFJRoEyhYErh6ltQp87tWG669pdeqXFPc8M+w/0qBSjMR5+q -OkMGntkb23RRD0jZ/ZmjvfGqLmxd3MoY/Y/1Qj/r9iL78vjGJ6bj/ILj9jWkquUk -AT7lIOfPBR6BAGx7kg4lmhXR7ywgquXrpioZuccOqvS2IQ+r6Vrgzzm49DqM9wo3 -P44VQlWZ701FNW1LyupIiyWhxtXyuTQ24IldPMSyT8x65USYHRunXdfo6ghAQBag -JV1U6QWYnRWGSCEaxmv2LQ6i+5fKNC6IoJLS2wMEHIxETHguIYTX5GNK83TpYquc -F2w0NcnO/xMPPFiNFwtX6N9/mexE7dDE+gDvUwWA0DodwHQFh9+IBphznr0OfrJ7 -G6I98tIJj5ecSsTnHyW6BzOxyoQ3KGXfwV8RnDoNtZXiK4boizGlrVrj3G5AzqAf -z4dnIox/zP3kPyhw6670a1PuesQ3aTvR63UqLFOwtm1moZCh5lJmxog32s4mikdn -cQiFFz83zd4hAMRsoJ7W5ABeZyM0V6DXd+lGO00bf3ex3cNFskLeSHLoArtiBt0n -xtWfXU7/h4URFCyD3rJPUruSPT8DPcvu+PpxB4MEnZhvpC+xOXp5LM+jHcSsIgXQ -Bp1wMpZtTN+tkJrtDGbRAoIBAQC/l9Ao+6I/NI1mO6AxMuwyFf4IVpSYH2j/wvrX -v7No+i8egiS5joi3boNFrwCFuNDYm4tu9DQmqOHOYFiJXCemRkeyglgn0tSnnfGH -jFQOlY4kX26lhI8A8EqCUiO3XYnEUsQOd2aJFDos1rr2UdIPFhmLIF7+6QNsFtsV -9r6JcNIl+tMlhAZ1LiPYaKsy3GWPptK0Ul3IykBocIf42CNBrbbhMjWeOHNVUzmQ -6EY5CEeGVEGOn8bkSq4mqtzDYVO9rFaX2ySsPtAwoelsxnxLRR+YFrVdIBS4zbJe -zUHMD7g0lDl+gLCTcwsi5hFqUIE0xldRjzkqZgyE5Y6sdaG3AoIBAQC8cfZtIxEr -4vVGl6ZqOsbqoXg1vNL99CQKK8KXBdi79ig4F0ItWgv/RfdZvLAQJ2fVDQcOQs7r -NYINYNJQ2+ASvwDpcjI//M/lCTFY/kNBbUdAYIIrMlNPVoAnfGXCSpxOmw6iTNeW -gvTHamvXR4hkI6XpxI60dmcBUxJExBsW9/Ua5oONZ37nYVL32/PZCba94nMpnJna -6sq+YYWewla0YOZBKgegL/9S4kSt23UaGEfP6tC55puVBEmJZ3ZF57eiNHRWjK5S -oLUSvoXghtNYD8kAE95DIMUfdZudnxcR7o0OZIXlXxdKuWELGj4rXLa52pTuXdUi -4SaoFBIBtqxbAoIBACHtKxImxOs2ThedVVeA8exIkqw9UKDDZx9rEmoj6wwayT84 -wnINOE5rBlnPjGMhf/HrUTx7h8SUwS9/Rg5EwEykQ0vr4tpibf+ka0GaasQOXZY5 -Q+mRWJVM95SuIN72dE/MbCU3pXN+nnKUL5UwlebP/gPx3oB7GH0Kaw8vmzvRO5U4 -Hi2VfGNBoV1P44jlGCLg4ozFzQ+iKNeVna5F77BQp4KnFnGHA31HeHkdeFnfCnNR -9i3gQyLnyoDUXrVGRcLfgpHvX0YU9V63PRNi7l2Rxu9yjfkIlkLHQd+hrxYIbpD4 -8llhRmNFyuIKJWlTCh4jRREPMIs+eNcLo0EMr28CggEAdbmER1325kHdFQK8VfTu -O7owDPIvpbMVqLz5zbs7UQFQ0unEz+xHPmyJQChenVjgxvpihAg8mDhPTOhFu3oB -G/TzLuPGfK8FeYIqr/v4LDGGp7Mi3rWL0i6p1QEj2SMfTwQTOlVjAYZCQSo8wLrR -fC3BAq9mr1qgsvUgnpEck1bIasvDN12GrCni2TExv08QOMP1SfOpMlgn3JO0FCA4 -Zmt6rQ8CwJH7od31Jd/J4kl043FAgEKOw6NwBUT7YGSIFe9LFp7AcKIxPMqFfGT+ -Ny91VHUVDz6zpxmj4+51TSCIQouOEmM7e9UfyP3WBOTgym7BR8Ca3DVoHuya6zMh -rQKCAQEAoy35YYbyHnb5CmVfCIBNk2ZrDERuT8NE6J7u2Y+1/yCmPJyLvA/JzYNm -LifFf9x8+WMKMVaANq5ZitBMr4gMhFQe2KflfSC9rEzfhAoWVqNGjCs79+xNEv3Q -eFv3ss/zU9AKbjepS6+wP6CuILqXVQ1lT/xMtJwc3+YQMyrIYHwUJ13IeGyCJRfY -1/81JcB5pm9sjrgvRccjNV739A8ACm78mlj5DRxasKfTXkIlx/oJUe/EXRGfuZwY -D2FxqP9LLa/gY9DzlUPYpWz3Y/KlbNO+nmakiAzHpPjwwca6FZug9xFW9Ua3SAwY -aH0VTefFJqNq7ADzlJxogNFALN+F0A== +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQCkAGDGMKsCINjn +b3ESNuhET89XCBVK0PR08Ta9I46rF9Vhg6o/tUXukbY94DsST1Cq/ZypoNZsNafm +9PcCI35qyV8e1cmAaM8Guk5Dcx7pFK7ZpQc8iC5ptSPP6QlVKSiPSmRsCqojEqx7 +WFm1Q0dXA6aD8+2GSkY83Gh81fWQJ8Fag0nK4oU6gZ42CZjqOhjXTNNqIemryGED +dvtZjflB00YPtLCRgG9OMYbMGv+G7bp+Ff8Odx5n2R/2fqrz32AHaRkflPV5OVKG +HXZMjWwdr4EbPVjBHQeozzzNcPIB/ttaXsfLl/x72R5l3HsXH/Cz3cFvca6wDnPB +SbWJR7U4jBMsWUv4p2SvSrOh8NxDRmc3CswTigCEIkXBKck5aesKZSi7ZdePzpeX +jzW4FqdrfTPMn1OeIcz+EDni20ey9r/IHy3KhdwjgUBLCTcZ2KGw4FtFLoq+Z4qk +RdCmy8FD0Z6NIv9RRa4qD47TKwcT35Bf/47OcWnrqH/Hs89DuDqKnACODC4bmOb8 +LPP53DiPdCnk3l/sOfdCEPOZeQqDNHOKsqH1dVCpQVHCUufxW7MCEPlKNE3QkMTa +OW3QeKf3TPH+eQ5XZvmFNi701WCat8LJ7q4H+qwrs9AGHr32Lj7y0EZetxLY16eb +4ebXZ8WE26b3oDRqGPXfgfaSzglwpwIDAQABAoICACiTq1399qGtLN1+NjSyfx8/ +u+Ylqtb7AjDY6Zk8bfUpDXN2Fy5yFF5lkPiYPSVXmHbmDtftYoAdenBrVZ4i2All +z3IapSNvSyG4ANsxZYl3w5c3/KVecFVZKwYq+1MlvtJNLrGIpfXNjf1qq69seP8v +eQiW1sLuJ5ixU+znJz3GiFFzwFNBXoNORK3MDBiPzUufx4Mv5tfI2S/5RVEwDmbZ +9jC2nSUy6Nco69geKfDhas39dUDH+i7pir37MyLptqG+wCePPHkE1MU4Duf76a8i +mEf8ErSdESMUO0/9TPNvcihW4Qofjam624mKVq4vCegGyvBe6UDIIp3FNfREWLLC +ilit37ZVOHbq79qV+DobLADYXZGXrptzr6VqVZvvQUEwOjftD2B8GJzxbNxRl77F +BbeOhYA/IDr8YR7Qos0HjqDDekRavHKAa5kcf8yFVJaQycirHJZpjr3oNFktqg6H +9eb537SdPI5nHtgTSQSosxst+iBsjMCJ7rU7aowy9gKG75s9eME06hiQsukNcOI3 +hmBqQBeX+yLWh7Z6A2Y7MepHuYHWKereBGISR58bvTmNyI4mLWYwJZzjON2tot3a +MJM86gw83TuX1Qmp3+NjduQtdtMjDSXLN2yBbK4CufQYaTxK1xdHUoK/uvG9kIq3 +tP+/fiTHZHyuTSSgwOypAoIBAQDT2Vj2uahOypboLv33XtFr2kuDe1sogpLaAVu4 +Dv4vO835gto4fx79rK3u2fBxiVx0yv7kwitvpcwaoMdkUEoSBtOugYB9UezW+SX5 +91bpY0mBH0ToSmosPFVc6PEki6LRV+VGZ1gFXU7uZ4Wia9opENfT7d8cjBQ4NZ/M +sCyqHR2KYB82DHx6Lrtrs0eWn33N8BVgsy4xSbOi2YrgQCnJvfPWVYtcXjRbplj4 +jCVGnPlac0Z/bv3Kb7Q7EOS+d+RFi1ZpsFYPbW5KRhGshzOxGw5d/nCjkEXCV0ht +uK6KndjFOvCGfikZW7WVpw7bkCe0W2Ko/JSX99ccJBDyau1NAoIBAQDGLj/wVxss +dllwswBOAV3oSL9xbUPs+Xbr/FK4XKcL7wcY6cfdZBlsQLQCoAzyTZ8t+g756Hlt +a8qmW2/Wvdo+m63Z2ecnbI9pJsVyYeT8pVumx4biHuXbRBYO/0ZZPtB5kTT6Hzhv +ZHxoUj4jb7L/5kwEdEPFIZX4rVEXY20LJL5wtv2zEQylQk9kunjUgrP1L/GtcNh+ +QRzLXiJWAoC4HWcXmdxb/Hc0BU5geSwZL4bbl3YL3lwWvkd3aY17T90EjWy4uA6G +tsHCxbxauul1q8OkmCcLEfhYnDh95YoVddR97XhC33S0v4dYjX/iS46g8fJ0HhRo +9YGWsD+tRevDAoIBAFCp/5/iTV3C8fbyfa1FI0SH2Bz2SV2Bal0sCzpoKwzdHq6U +znaYoLpCl+/MeCsi/FtUN/3umQ9n9/FjqshdcfavNsbJdJ1DJoUsVPN65FL1hTVv +LJOuUgMJ7g70e21I5fQEHb7S9scEIlvQeye/HVBpo2SEvGFoTQKiGHid1EPp1ies +NfYkhvkW9jIqD2Yg0IwrkFhDoaEOySGG58Q/ainw8/l2lRvUmucSzenFoyPh/Wgd +YIiBQI1mPyAGbLLBf9+jEIIprHsvVcFeMLiaumoDPVM44LbG5mj7Rw7QNVV+iN2A +dbkgLJIFQ3z6IUQk/ZlE+qoRkprSuctzSCil4jkCggEAdiYWilNz6NL5yX193gNk +l9nfAGFS0JF8+31nV3AtSqkLAyhEtlE58ta0Oqhub3olPwTILucQlVJg80Kp700q +Mo8fWzRUYaWP7fFmXyXLnW97r3dei6o+ALWbrP81UnlnUkJmYgOA4q/2lz8Iupma +DoOeqD0kNf8q6KFzKc1lsfIK8ym1IC826cMZkAS3ioINhUw6+dq/xq1M3FVXhQ1i +7eDhmClrPQ/LhSDwtAUpbC5waLPodXTwU8LG2oL8DRr0ugUSXyGjz15fL54xB6pN +CpEHRzZKeIgTFci0ySGya87eiuCrBLsxWZyhtQJOznubIYp8sAtKwbQzuMGEhOmd +fwKCAQEAlZoi1SzHstg6PfpwrIHJV3imLa550k9hyAu610CKjMsg6IsFsgu9/J0b +9hkhlafeW+p9zhKSwjl3aCuWUMNE53R+zYmMIJJrBzejC+1H0SKW0Zix9+ghixOX +da1jRaUxUqApJYvvxUC8FbnATM/Eq0ofhGkG3o575SlO+54twJO+bXGAUf/C6xMY +AQUQh90pTbZ96Q3Wdm2Qmrhd/GUaC6k1vAHVHHU8WQHiLmo1fF4gL/TqRv5KEPUM +un6ld7h8BEWtMClhSIiL2h5nvSYGcB6Lai6rPO0UUbGkWBQFpGaeglUmoYi0ciC5 +lMRrRHGUiWHW9C4/siOKYrHBeH5oNQ== -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml b/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml index c6ac737b632..4bd30163ea6 100644 --- a/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml +++ b/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml @@ -11,6 +11,12 @@ client3 + + + URI:spiffe://foo.com/bar + URI:spiffe://foo.com/baz + + diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 22d41bb6e14..756a1e1996c 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -338,3 +338,39 @@ def test_create_user(): == 'emma\tssl_certificate\t{"common_names":["client2"]}\n' 'lucy\tssl_certificate\t{"common_names":["client2","client3"]}\n' ) + + +def test_x509_san_support(): + assert ( + execute_query_native( + instance, "SELECT currentUser()", user="jerome", cert_name="client4" + ) + == "jerome\n" + ) + assert ( + execute_query_https("SELECT currentUser()", user="jerome", cert_name="client4") + == "jerome\n" + ) + assert ( + instance.query( + "SELECT name, auth_type, auth_params FROM system.users WHERE name='jerome'" + ) + == 'jerome\tssl_certificate\t{"subject_alt_names":["URI:spiffe:\\\\/\\\\/foo.com\\\\/bar","URI:spiffe:\\\\/\\\\/foo.com\\\\/baz"]}\n' + ) + # user `jerome` is configured via xml config, but `show create` should work regardless. + assert ( + instance.query("SHOW CREATE USER jerome") + == "CREATE USER jerome IDENTIFIED WITH ssl_certificate SAN \\'URI:spiffe://foo.com/bar\\', \\'URI:spiffe://foo.com/baz\\'\n" + ) + + instance.query( + "CREATE USER jemma IDENTIFIED WITH ssl_certificate SAN 'URI:spiffe://foo.com/bar', 'URI:spiffe://foo.com/baz'" + ) + assert ( + execute_query_https("SELECT currentUser()", user="jemma", cert_name="client4") + == "jemma\n" + ) + assert ( + instance.query("SHOW CREATE USER jemma") + == "CREATE USER jemma IDENTIFIED WITH ssl_certificate SAN \\'URI:spiffe://foo.com/bar\\', \\'URI:spiffe://foo.com/baz\\'\n" + ) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index f836c58ce30..20b004a7605 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -10,13 +10,10 @@ import threading import time from azure.storage.blob import BlobServiceClient -import helpers.client import pytest from helpers.cluster import ClickHouseCluster, ClickHouseInstance -from helpers.network import PartitionManager -from helpers.mock_servers import start_mock_servers -from helpers.test_tools import exec_query_with_retry from helpers.test_tools import assert_logs_contain_with_retry +from helpers.test_tools import TSV @pytest.fixture(scope="module") @@ -790,6 +787,25 @@ def test_read_subcolumns(cluster): assert res == "42\tcont/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" +def test_read_subcolumn_time(cluster): + node = cluster.instances["node"] + storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"] + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumn_time.tsv', " + f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto'," + f" 'a UInt32') select (42)", + ) + + res = node.query( + f"select a, dateDiff('minute', _time, now()) < 59 from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumn_time.tsv'," + f" 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto'," + f" 'a UInt32')" + ) + + assert res == "42\t1\n" + + def test_read_from_not_existing_container(cluster): node = cluster.instances["node"] query = ( @@ -1217,7 +1233,7 @@ def test_filtering_by_file_or_path(cluster): node.query("SYSTEM FLUSH LOGS") result = node.query( - f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query like '%select%azure%test_filter%' AND type='QueryFinish'" + f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query ilike '%select%azure%test_filter%' AND type='QueryFinish'" ) assert int(result) == 1 @@ -1412,3 +1428,37 @@ def test_respect_object_existence_on_partitioned_write(cluster): ) assert int(result) == 44 + + +def test_insert_create_new_file(cluster): + node = cluster.instances["node"] + storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"] + account_name = "devstoreaccount1" + account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_create_new_file.csv', '{account_name}', '{account_key}', 'a UInt64') VALUES (1)", + settings={ + "azure_truncate_on_insert": False, + "azure_create_new_file_on_insert": True, + }, + ) + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_create_new_file.csv', '{account_name}', '{account_key}', 'a UInt64') VALUES (2)", + settings={ + "azure_truncate_on_insert": False, + "azure_create_new_file_on_insert": True, + }, + ) + + res = azure_query( + node, + f"SELECT _file, * FROM azureBlobStorage('{storage_account_url}', 'cont', 'test_create_new_file*', '{account_name}', '{account_key}', 'a UInt64') ORDER BY a", + ) + + assert TSV(res) == TSV( + "test_create_new_file.csv\t1\ntest_create_new_file.1.csv\t2\n" + ) diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 25f0b58e0f5..4cb71895881 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -153,7 +153,7 @@ def test_single_log_file(started_cluster): bucket = started_cluster.minio_bucket TABLE_NAME = "test_single_log_file" - inserted_data = "SELECT number, toString(number + 1) FROM numbers(100)" + inserted_data = "SELECT number as a, toString(number + 1) as b FROM numbers(100)" parquet_data_path = create_initial_data_file( started_cluster, instance, inserted_data, TABLE_NAME ) @@ -511,3 +511,104 @@ def test_restart_broken_table_function(started_cluster): upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + +def test_partition_columns(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + TABLE_NAME = "test_partition_columns" + result_file = f"{TABLE_NAME}" + partition_columns = ["b", "c", "d", "e"] + + delta_table = ( + DeltaTable.create(spark) + .tableName(TABLE_NAME) + .location(f"/{result_file}") + .addColumn("a", "INT") + .addColumn("b", "STRING") + .addColumn("c", "DATE") + .addColumn("d", "INT") + .addColumn("e", "BOOLEAN") + .partitionedBy(partition_columns) + .execute() + ) + num_rows = 9 + + schema = StructType( + [ + StructField("a", IntegerType()), + StructField("b", StringType()), + StructField("c", DateType()), + StructField("d", IntegerType()), + StructField("e", BooleanType()), + ] + ) + + for i in range(1, num_rows + 1): + data = [ + ( + i, + "test" + str(i), + datetime.strptime(f"2000-01-0{i}", "%Y-%m-%d"), + i, + False, + ) + ] + df = spark.createDataFrame(data=data, schema=schema) + df.printSchema() + df.write.mode("append").format("delta").partitionBy(partition_columns).save( + f"/{TABLE_NAME}" + ) + + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + + files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + assert len(files) > 0 + print(f"Uploaded files: {files}") + + result = instance.query( + f"describe table deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')" + ).strip() + + assert ( + result + == "a\tNullable(Int32)\t\t\t\t\t\nb\tNullable(String)\t\t\t\t\t\nc\tNullable(Date32)\t\t\t\t\t\nd\tNullable(Int32)\t\t\t\t\t\ne\tNullable(Bool)" + ) + + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + """ + ) + ) + assert result == num_rows + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + WHERE c == toDateTime('2000/01/05') + """ + ) + ) + assert result == 1 + + # instance.query( + # f""" + # DROP TABLE IF EXISTS {TABLE_NAME}; + # CREATE TABLE {TABLE_NAME} (a Int32, b String, c DateTime) + # ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + # ) + # assert ( + # int( + # instance.query( + # f"SELECT count() FROM {TABLE_NAME} WHERE c != toDateTime('2000/01/05')" + # ) + # ) + # == num_rows - 1 + # ) + # instance.query(f"SELECT a, b, c, FROM {TABLE_NAME}") + # assert False diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 44c0223e677..47d8f44c0b7 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -999,6 +999,20 @@ def test_read_subcolumns(started_cluster): assert res == "42\ttest_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" +def test_read_subcolumn_time(started_cluster): + node = started_cluster.instances["node1"] + + node.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumn_time.tsv', auto, 'a UInt32') select (42)" + ) + + res = node.query( + f"select a, dateDiff('minute', _time, now()) < 59 from hdfs('hdfs://hdfs1:9000/test_subcolumn_time.tsv', auto, 'a UInt32')" + ) + + assert res == "42\t1\n" + + def test_union_schema_inference_mode(started_cluster): node = started_cluster.instances["node1"] diff --git a/tests/integration/test_storage_hudi/test.py b/tests/integration/test_storage_hudi/test.py index 6fe7a193129..0c3fbfb3cda 100644 --- a/tests/integration/test_storage_hudi/test.py +++ b/tests/integration/test_storage_hudi/test.py @@ -4,7 +4,7 @@ import os import json import helpers.client -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, ClickHouseInstance from helpers.test_tools import TSV from helpers.s3_tools import prepare_s3_bucket, upload_directory, get_file_contents diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index d9dee0541b0..7762d17b96f 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -1,5 +1,5 @@ import helpers.client -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, ClickHouseInstance from helpers.test_tools import TSV import pyspark diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 23a95d5dd71..3240039ee81 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -702,7 +702,7 @@ def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): assert ( int(result1) == messages_num * threads_num - ), "ClickHouse lost some messages: {}".format(result) + ), "ClickHouse lost some messages: {}".format(result1) assert int(result2) == 10 @@ -1516,7 +1516,7 @@ def test_rabbitmq_hash_exchange(rabbitmq_cluster): assert ( int(result1) == messages_num * threads_num - ), "ClickHouse lost some messages: {}".format(result) + ), "ClickHouse lost some messages: {}".format(result1) assert int(result2) == 4 * num_tables @@ -1966,7 +1966,7 @@ def test_rabbitmq_many_consumers_to_each_queue(rabbitmq_cluster): assert ( int(result1) == messages_num * threads_num - ), "ClickHouse lost some messages: {}".format(result) + ), "ClickHouse lost some messages: {}".format(result1) # 4 tables, 2 consumers for each table => 8 consumer tags assert int(result2) == 8 @@ -2427,9 +2427,7 @@ def test_rabbitmq_drop_table_properly(rabbitmq_cluster): time.sleep(30) try: - exists = channel.queue_declare( - callback, queue="rabbit_queue_drop", passive=True - ) + exists = channel.queue_declare(queue="rabbit_queue_drop", passive=True) except Exception as e: exists = False @@ -3364,7 +3362,7 @@ def test_rabbitmq_flush_by_block_size(rabbitmq_cluster): routing_key="", body=json.dumps({"key": 0, "value": 0}), ) - except e: + except Exception as e: logging.debug(f"Got error: {str(e)}") produce_thread = threading.Thread(target=produce) @@ -3442,7 +3440,7 @@ def test_rabbitmq_flush_by_time(rabbitmq_cluster): ) logging.debug("Produced a message") time.sleep(0.8) - except e: + except Exception as e: logging.debug(f"Got error: {str(e)}") produce_thread = threading.Thread(target=produce) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 09b27fff1e8..9a0cb352088 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1133,6 +1133,7 @@ def test_seekable_formats(started_cluster): exec_query_with_retry( instance, f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1000000) settings s3_truncate_on_insert=1", + timeout=100, ) result = instance.query(f"SELECT count() FROM {table_function}") @@ -1142,6 +1143,7 @@ def test_seekable_formats(started_cluster): exec_query_with_retry( instance, f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1", + timeout=100, ) result = instance.query( @@ -1169,6 +1171,7 @@ def test_seekable_formats_url(started_cluster): exec_query_with_retry( instance, f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1", + timeout=100, ) result = instance.query(f"SELECT count() FROM {table_function}") @@ -1178,6 +1181,7 @@ def test_seekable_formats_url(started_cluster): exec_query_with_retry( instance, f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1", + timeout=100, ) table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_parquet', 'Parquet', 'a Int32, b String')" @@ -2149,6 +2153,21 @@ def test_read_subcolumns(started_cluster): ) +def test_read_subcolumn_time(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumn_time.tsv', auto, 'a UInt32') select (42)" + ) + + res = instance.query( + f"select a, dateDiff('minute', _time, now()) < 59 from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumn_time.tsv', auto, 'a UInt32')" + ) + + assert res == "42\t1\n" + + def test_filtering_by_file_or_path(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] @@ -2177,6 +2196,12 @@ def test_filtering_by_file_or_path(started_cluster): assert int(result) == 1 + assert 0 == int( + instance.query( + f"select count() from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_filter*.tsv') where _file = 'kek'" + ) + ) + def test_union_schema_inference_mode(started_cluster): bucket = started_cluster.minio_bucket diff --git a/tests/integration/test_storage_s3_queue/configs/merge_tree.xml b/tests/integration/test_storage_s3_queue/configs/merge_tree.xml new file mode 100644 index 00000000000..61eba8face7 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/configs/merge_tree.xml @@ -0,0 +1,5 @@ + + + 0 + + diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 66631c51b03..b93e560d5b9 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -12,6 +12,7 @@ import json AVAILABLE_MODES = ["unordered", "ordered"] DEFAULT_AUTH = ["'minio'", "'minio123'"] NO_AUTH = ["NOSIGN"] +AZURE_CONTAINER_NAME = "cont" def prepare_public_s3_bucket(started_cluster): @@ -84,6 +85,7 @@ def started_cluster(): "instance", user_configs=["configs/users.xml"], with_minio=True, + with_azurite=True, with_zookeeper=True, main_configs=[ "configs/zookeeper.xml", @@ -110,11 +112,27 @@ def started_cluster(): with_installed_binary=True, use_old_analyzer=True, ) + cluster.add_instance( + "instance_too_many_parts", + user_configs=["configs/users.xml"], + with_minio=True, + with_zookeeper=True, + main_configs=[ + "configs/s3queue_log.xml", + "configs/merge_tree.xml", + ], + stay_alive=True, + ) logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") + container_client = cluster.blob_service_client.get_container_client( + AZURE_CONTAINER_NAME + ) + container_client.create_container() + yield cluster finally: cluster.shutdown() @@ -134,6 +152,7 @@ def generate_random_files( started_cluster, files_path, count, + storage="s3", column_num=3, row_num=10, start_ind=0, @@ -155,7 +174,10 @@ def generate_random_files( values_csv = ( "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" ).encode() - put_s3_file_content(started_cluster, filename, values_csv, bucket) + if storage == "s3": + put_s3_file_content(started_cluster, filename, values_csv, bucket) + else: + put_azure_file_content(started_cluster, filename, values_csv, bucket) return total_values @@ -165,12 +187,21 @@ def put_s3_file_content(started_cluster, filename, data, bucket=None): started_cluster.minio_client.put_object(bucket, filename, buf, len(data)) +def put_azure_file_content(started_cluster, filename, data, bucket=None): + client = started_cluster.blob_service_client.get_blob_client( + AZURE_CONTAINER_NAME, filename + ) + buf = io.BytesIO(data) + client.upload_blob(buf, "BlockBlob", len(data)) + + def create_table( started_cluster, node, table_name, mode, files_path, + engine_name="S3Queue", format="column1 UInt32, column2 UInt32, column3 UInt32", additional_settings={}, file_format="CSV", @@ -189,11 +220,17 @@ def create_table( } settings.update(additional_settings) - url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" + engine_def = None + if engine_name == "S3Queue": + url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" + engine_def = f"{engine_name}('{url}', {auth_params}, {file_format})" + else: + engine_def = f"{engine_name}('{started_cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{files_path}/', 'CSV')" + node.query(f"DROP TABLE IF EXISTS {table_name}") create_query = f""" CREATE TABLE {table_name} ({format}) - ENGINE = S3Queue('{url}', {auth_params}, {file_format}) + ENGINE = {engine_def} SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))} """ @@ -224,17 +261,22 @@ def create_mv( ) -@pytest.mark.parametrize("mode", AVAILABLE_MODES) -def test_delete_after_processing(started_cluster, mode): +@pytest.mark.parametrize("mode", ["unordered", "ordered"]) +@pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"]) +def test_delete_after_processing(started_cluster, mode, engine_name): node = started_cluster.instances["instance"] - table_name = f"test.delete_after_processing_{mode}" + table_name = f"test.delete_after_processing_{mode}_{engine_name}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" files_num = 5 row_num = 10 + if engine_name == "S3Queue": + storage = "s3" + else: + storage = "azure" total_values = generate_random_files( - started_cluster, files_path, files_num, row_num=row_num + started_cluster, files_path, files_num, row_num=row_num, storage=storage ) create_table( started_cluster, @@ -243,6 +285,7 @@ def test_delete_after_processing(started_cluster, mode): mode, files_path, additional_settings={"after_processing": "delete"}, + engine_name=engine_name, ) create_mv(node, table_name, dst_table_name) @@ -263,15 +306,24 @@ def test_delete_after_processing(started_cluster, mode): ).splitlines() ] == sorted(total_values, key=lambda x: (x[0], x[1], x[2])) - minio = started_cluster.minio_client - objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) - assert len(objects) == 0 + if engine_name == "S3Queue": + minio = started_cluster.minio_client + objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) + assert len(objects) == 0 + else: + client = started_cluster.blob_service_client.get_container_client( + AZURE_CONTAINER_NAME + ) + objects_iterator = client.list_blobs(files_path) + for objects in objects_iterator: + assert False -@pytest.mark.parametrize("mode", AVAILABLE_MODES) -def test_failed_retry(started_cluster, mode): +@pytest.mark.parametrize("mode", ["unordered", "ordered"]) +@pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"]) +def test_failed_retry(started_cluster, mode, engine_name): node = started_cluster.instances["instance"] - table_name = f"test.failed_retry_{mode}" + table_name = f"test.failed_retry_{mode}_{engine_name}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" file_path = f"{files_path}/trash_test.csv" @@ -284,7 +336,10 @@ def test_failed_retry(started_cluster, mode): values_csv = ( "\n".join((",".join(map(str, row)) for row in values)) + "\n" ).encode() - put_s3_file_content(started_cluster, file_path, values_csv) + if engine_name == "S3Queue": + put_s3_file_content(started_cluster, file_path, values_csv) + else: + put_azure_file_content(started_cluster, file_path, values_csv) create_table( started_cluster, @@ -296,6 +351,7 @@ def test_failed_retry(started_cluster, mode): "s3queue_loading_retries": retries_num, "keeper_path": keeper_path, }, + engine_name=engine_name, ) create_mv(node, table_name, dst_table_name) @@ -352,6 +408,7 @@ def test_direct_select_file(started_cluster, mode): files_path, additional_settings={ "keeper_path": keeper_path, + "s3queue_processing_threads_num": 1, }, ) @@ -379,6 +436,7 @@ def test_direct_select_file(started_cluster, mode): files_path, additional_settings={ "keeper_path": keeper_path, + "s3queue_processing_threads_num": 1, }, ) @@ -397,6 +455,7 @@ def test_direct_select_file(started_cluster, mode): files_path, additional_settings={ "keeper_path": keeper_path, + "s3queue_processing_threads_num": 1, }, ) @@ -778,10 +837,12 @@ def test_max_set_age(started_cluster): files_path, additional_settings={ "keeper_path": keeper_path, - "s3queue_tracked_file_ttl_sec": max_age, - "s3queue_cleanup_interval_min_ms": 0, - "s3queue_cleanup_interval_max_ms": 0, - "s3queue_loading_retries": 0, + "tracked_file_ttl_sec": max_age, + "cleanup_interval_min_ms": max_age / 3, + "cleanup_interval_max_ms": max_age / 3, + "loading_retries": 0, + "processing_threads_num": 1, + "loading_retries": 0, }, ) create_mv(node, table_name, dst_table_name) @@ -806,7 +867,7 @@ def test_max_set_age(started_cluster): assert expected_rows == get_count() assert 10 == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) - time.sleep(max_age + 1) + time.sleep(max_age + 5) expected_rows = 20 @@ -830,7 +891,7 @@ def test_max_set_age(started_cluster): failed_count = int( node.query( - "SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles' SETTINGS system_events_show_zero_values=1" + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) ) @@ -845,7 +906,7 @@ def test_max_set_age(started_cluster): for _ in range(30): if failed_count + 1 == int( node.query( - "SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles' SETTINGS system_events_show_zero_values=1" + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) ): break @@ -853,7 +914,7 @@ def test_max_set_age(started_cluster): assert failed_count + 1 == int( node.query( - "SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles' SETTINGS system_events_show_zero_values=1" + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) ) @@ -861,6 +922,11 @@ def test_max_set_age(started_cluster): assert "Cannot parse input" in node.query( "SELECT exception FROM system.s3queue WHERE file_name ilike '%fff.csv'" ) + assert 1 == int( + node.query( + "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv'" + ) + ) assert 1 == int( node.query( "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv' AND notEmpty(exception)" @@ -870,14 +936,16 @@ def test_max_set_age(started_cluster): time.sleep(max_age + 1) assert failed_count + 2 == int( - node.query("SELECT value FROM system.events WHERE name = 'S3QueueFailedFiles'") + node.query( + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles'" + ) ) node.query("SYSTEM FLUSH LOGS") assert "Cannot parse input" in node.query( "SELECT exception FROM system.s3queue WHERE file_name ilike '%fff.csv' ORDER BY processing_end_time DESC LIMIT 1" ) - assert 2 == int( + assert 1 < int( node.query( "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv' AND notEmpty(exception)" ) @@ -1284,7 +1352,7 @@ def test_shards_distributed(started_cluster, mode, processing_threads): def get_count(node, table_name): return int(run_query(node, f"SELECT count() FROM {table_name}")) - for _ in range(10): + for _ in range(30): if ( get_count(node, dst_table_name) + get_count(node_2, dst_table_name) ) == total_rows: @@ -1414,7 +1482,7 @@ def test_settings_check(started_cluster): ) assert ( - "Existing table metadata in ZooKeeper differs in s3queue_buckets setting. Stored in ZooKeeper: 2, local: 3" + "Existing table metadata in ZooKeeper differs in buckets setting. Stored in ZooKeeper: 2, local: 3" in create_table( started_cluster, node_2, @@ -1577,3 +1645,156 @@ def test_upgrade(started_cluster): node.restart_with_latest_version() assert expected_rows == get_count() + + +def test_exception_during_insert(started_cluster): + node = started_cluster.instances["instance_too_many_parts"] + + table_name = f"test_exception_during_insert" + dst_table_name = f"{table_name}_dst" + keeper_path = f"/clickhouse/test_{table_name}" + files_path = f"{table_name}_data" + files_to_generate = 10 + + create_table( + started_cluster, + node, + table_name, + "unordered", + files_path, + additional_settings={ + "keeper_path": keeper_path, + }, + ) + total_values = generate_random_files( + started_cluster, files_path, files_to_generate, start_ind=0, row_num=1 + ) + + create_mv(node, table_name, dst_table_name) + + node.wait_for_log_line( + "Failed to process data: Code: 252. DB::Exception: Too many parts" + ) + + time.sleep(2) + exception = node.query( + f"SELECT exception FROM system.s3queue WHERE zookeeper_path ilike '%{table_name}%' and notEmpty(exception)" + ) + assert "Too many parts" in exception + + node.replace_in_config( + "/etc/clickhouse-server/config.d/merge_tree.xml", + "parts_to_throw_insert>0", + "parts_to_throw_insert>10", + ) + node.restart_clickhouse() + + def get_count(): + return int(node.query(f"SELECT count() FROM {dst_table_name}")) + + expected_rows = 10 + for _ in range(20): + if expected_rows == get_count(): + break + time.sleep(1) + assert expected_rows == get_count() + + +def test_commit_on_limit(started_cluster): + node = started_cluster.instances["instance"] + + table_name = f"test_commit_on_limit" + dst_table_name = f"{table_name}_dst" + keeper_path = f"/clickhouse/test_{table_name}" + files_path = f"{table_name}_data" + files_to_generate = 10 + + create_table( + started_cluster, + node, + table_name, + "ordered", + files_path, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_processing_threads_num": 1, + "s3queue_loading_retries": 0, + "s3queue_max_processed_files_before_commit": 10, + }, + ) + total_values = generate_random_files( + started_cluster, files_path, files_to_generate, start_ind=0, row_num=1 + ) + + incorrect_values = [ + ["failed", 1, 1], + ] + incorrect_values_csv = ( + "\n".join((",".join(map(str, row)) for row in incorrect_values)) + "\n" + ).encode() + + correct_values = [ + [1, 1, 1], + ] + correct_values_csv = ( + "\n".join((",".join(map(str, row)) for row in correct_values)) + "\n" + ).encode() + + put_s3_file_content( + started_cluster, f"{files_path}/test_99.csv", correct_values_csv + ) + put_s3_file_content( + started_cluster, f"{files_path}/test_999.csv", correct_values_csv + ) + put_s3_file_content( + started_cluster, f"{files_path}/test_9999.csv", incorrect_values_csv + ) + put_s3_file_content( + started_cluster, f"{files_path}/test_99999.csv", correct_values_csv + ) + put_s3_file_content( + started_cluster, f"{files_path}/test_999999.csv", correct_values_csv + ) + + create_mv(node, table_name, dst_table_name) + + def get_processed_files(): + return ( + node.query( + f"SELECT file_name FROM system.s3queue WHERE zookeeper_path ilike '%{table_name}%' and status = 'Processed' and rows_processed > 0 " + ) + .strip() + .split("\n") + ) + + def get_failed_files(): + return ( + node.query( + f"SELECT file_name FROM system.s3queue WHERE zookeeper_path ilike '%{table_name}%' and status = 'Failed'" + ) + .strip() + .split("\n") + ) + + for _ in range(30): + if "test_999999.csv" in get_processed_files(): + break + time.sleep(1) + assert "test_999999.csv" in get_processed_files() + + assert 1 == int( + node.query( + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" + ) + ) + + expected_processed = ["test_" + str(i) + ".csv" for i in range(files_to_generate)] + processed = get_processed_files() + for value in expected_processed: + assert value in processed + + expected_failed = ["test_9999.csv"] + failed = get_failed_files() + for value in expected_failed: + assert value not in processed + assert value in failed diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index 084d342d736..2022f9d4a89 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -2,7 +2,6 @@ # pylint: disable=unused-argument # pylint: disable=redefined-outer-name -import time import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry @@ -22,6 +21,7 @@ system_logs = [ ("system.part_log", 1), ("system.trace_log", 1), ("system.metric_log", 1), + ("system.error_log", 1), ] diff --git a/tests/integration/test_system_logs_recreate/test.py b/tests/integration/test_system_logs_recreate/test.py index 2e8a0e4e877..1bdb1fe3261 100644 --- a/tests/integration/test_system_logs_recreate/test.py +++ b/tests/integration/test_system_logs_recreate/test.py @@ -30,6 +30,7 @@ def test_system_logs_recreate(): "part_log", "trace_log", "metric_log", + "error_log", ] node.query("SYSTEM FLUSH LOGS") diff --git a/tests/integration/test_table_db_num_limit/__init__.py b/tests/integration/test_table_db_num_limit/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_table_db_num_limit/config/config.xml b/tests/integration/test_table_db_num_limit/config/config.xml new file mode 100644 index 00000000000..9a573b158fe --- /dev/null +++ b/tests/integration/test_table_db_num_limit/config/config.xml @@ -0,0 +1,5 @@ + + 10 + 10 + + diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py new file mode 100644 index 00000000000..aa8030b077c --- /dev/null +++ b/tests/integration/test_table_db_num_limit/test.py @@ -0,0 +1,43 @@ +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", main_configs=["config/config.xml"], with_zookeeper=True +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_table_db_limit(started_cluster): + for i in range(10): + node1.query("create database db{}".format(i)) + + with pytest.raises(QueryRuntimeException) as exp_info: + node1.query("create database db_exp".format(i)) + + assert "TOO_MANY_DATABASES" in str(exp_info) + + for i in range(10): + node1.query("create table t{} (a Int32) Engine = Log".format(i)) + + node1.query("system flush logs") + for i in range(10): + node1.query("drop table t{}".format(i)) + for i in range(10): + node1.query("create table t{} (a Int32) Engine = Log".format(i)) + + with pytest.raises(QueryRuntimeException) as exp_info: + node1.query("create table default.tx (a Int32) Engine = Log") + assert "TOO_MANY_TABLES" in str(exp_info) diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 94432b89ab6..3b79ea7916d 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -1850,7 +1850,7 @@ class TestCancelBackgroundMoving: config = inspect.cleandoc( f""" - { 256 * 1024 } + {256 * 1024} """ ) diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index f944adbea41..538322473ee 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -325,7 +325,7 @@ def optimize_with_retry(node, table_name, retry=20): settings={"optimize_throw_if_noop": "1"}, ) break - except e: + except: time.sleep(0.5) diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/__init__.py b/tests/integration/test_unknown_column_dist_table_with_alias/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/configs/clusters.xml b/tests/integration/test_unknown_column_dist_table_with_alias/configs/clusters.xml new file mode 100644 index 00000000000..754d765f23f --- /dev/null +++ b/tests/integration/test_unknown_column_dist_table_with_alias/configs/clusters.xml @@ -0,0 +1,12 @@ + + + + + + localhost + 9000 + + + + + diff --git a/tests/integration/test_unknown_column_dist_table_with_alias/test.py b/tests/integration/test_unknown_column_dist_table_with_alias/test.py new file mode 100644 index 00000000000..0d3890f3e09 --- /dev/null +++ b/tests/integration/test_unknown_column_dist_table_with_alias/test.py @@ -0,0 +1,35 @@ +import pytest +from helpers.cluster import ClickHouseCluster +import logging + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", main_configs=["configs/clusters.xml"]) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +@pytest.mark.parametrize("prefer_localhost_replica", [0, 1]) +def test_distributed_table_with_alias(start_cluster, prefer_localhost_replica): + node.query( + """ + DROP TABLE IF EXISTS local; + DROP TABLE IF EXISTS dist; + CREATE TABLE local(`dummy` UInt8) ENGINE = MergeTree ORDER BY tuple(); + CREATE TABLE dist AS local ENGINE = Distributed(localhost_cluster, currentDatabase(), local); + """ + ) + + node.query( + "WITH 'Hello' AS `alias` SELECT `alias` FROM dist GROUP BY `alias`;", + settings={"prefer_localhost_replica": prefer_localhost_replica}, + ) diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing2.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing2.xml new file mode 100644 index 00000000000..fd416cad505 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing2.xml @@ -0,0 +1,35 @@ + + + + random + + 1 + + + 0 + 1 + + + + zoo1 + 2181 + az1 + + + zoo2 + 2181 + az2 + + + zoo3 + 2181 + az3 + + 3000 + + + + 0 + az2 + + diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py index f17e0c3f03f..9cdf7db2b08 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -1,6 +1,8 @@ +import time import pytest from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster( __file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml" @@ -17,6 +19,10 @@ node3 = cluster.add_instance( "nod3", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] ) +node4 = cluster.add_instance( + "nod4", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing2.xml"] +) + def change_balancing(old, new, reload=True): line = "{}<" @@ -405,113 +411,57 @@ def test_hostname_levenshtein_distance(started_cluster): def test_round_robin(started_cluster): pm = PartitionManager() try: - pm._add_rule( - { - "source": node1.ip_address, - "destination": cluster.get_instance_ip("zoo1"), - "action": "REJECT --reject-with tcp-reset", - } - ) - pm._add_rule( - { - "source": node2.ip_address, - "destination": cluster.get_instance_ip("zoo1"), - "action": "REJECT --reject-with tcp-reset", - } - ) - pm._add_rule( - { - "source": node3.ip_address, - "destination": cluster.get_instance_ip("zoo1"), - "action": "REJECT --reject-with tcp-reset", - } - ) change_balancing("random", "round_robin") - - print( - str( - node1.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", - ], - privileged=True, - user="root", - ) + for node in [node1, node2, node3]: + idx = int( + node.query("select index from system.zookeeper_connection").strip() ) - ) - assert ( - "1" - == str( - node1.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", - ], - privileged=True, - user="root", - ) - ).strip() - ) + new_idx = (idx + 1) % 3 - print( - str( - node2.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", - ], - privileged=True, - user="root", - ) + pm._add_rule( + { + "source": node.ip_address, + "destination": cluster.get_instance_ip("zoo" + str(idx + 1)), + "action": "REJECT --reject-with tcp-reset", + } ) - ) - assert ( - "1" - == str( - node2.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", - ], - privileged=True, - user="root", - ) - ).strip() - ) - print( - str( - node3.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", - ], - privileged=True, - user="root", - ) + assert_eq_with_retry( + node, + "select index from system.zookeeper_connection", + str(new_idx) + "\n", ) - ) - assert ( - "1" - == str( - node3.exec_in_container( - [ - "bash", - "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", - ], - privileged=True, - user="root", - ) - ).strip() - ) - + pm.heal_all() finally: pm.heal_all() change_balancing("round_robin", "random", reload=False) + + +def test_az(started_cluster): + pm = PartitionManager() + try: + # make sure it disconnects from the optimal node + pm._add_rule( + { + "source": node4.ip_address, + "destination": cluster.get_instance_ip("zoo2"), + "action": "REJECT --reject-with tcp-reset", + } + ) + + node4.query_with_retry("select * from system.zookeeper where path='/'") + assert "az2\n" != node4.query( + "select availability_zone from system.zookeeper_connection" + ) + + # fallback_session_lifetime.max is 1 second, but it shouldn't drop current session until the node becomes available + + time.sleep(5) # this is fine + assert 5 <= int(node4.query("select zookeeperSessionUptime()").strip()) + + pm.heal_all() + assert_eq_with_retry( + node4, "select availability_zone from system.zookeeper_connection", "az2\n" + ) + finally: + pm.heal_all() diff --git a/tests/integration/test_zookeeper_fallback_session/test.py b/tests/integration/test_zookeeper_fallback_session/test.py index 9afabfa3da3..932bbe482d2 100644 --- a/tests/integration/test_zookeeper_fallback_session/test.py +++ b/tests/integration/test_zookeeper_fallback_session/test.py @@ -84,10 +84,28 @@ def test_fallback_session(started_cluster: ClickHouseCluster): ) # at this point network partitioning has been reverted. - # the nodes should switch to zoo1 automatically because of `in_order` load-balancing. + # the nodes should switch to zoo1 because of `in_order` load-balancing. # otherwise they would connect to a random replica + + # but there's no reason to reconnect because current session works + # and there's no "optimal" node with `in_order` load-balancing + # so we need to break the current session + for node in [node1, node2, node3]: - assert_uses_zk_node(node, "zoo1") + assert_uses_zk_node(node, "zoo3") + + with PartitionManager() as pm: + for node in started_cluster.instances.values(): + pm._add_rule( + { + "source": node.ip_address, + "destination": cluster.get_instance_ip("zoo3"), + "action": "REJECT --reject-with tcp-reset", + } + ) + + for node in [node1, node2, node3]: + assert_uses_zk_node(node, "zoo1") node1.query_with_retry("INSERT INTO simple VALUES ({0}, {0})".format(2)) for node in [node2, node3]: diff --git a/tests/jepsen.clickhouse/resources/keeper_config.xml b/tests/jepsen.clickhouse/resources/keeper_config.xml index 6bc4ad89839..b5c0aac6a1b 100644 --- a/tests/jepsen.clickhouse/resources/keeper_config.xml +++ b/tests/jepsen.clickhouse/resources/keeper_config.xml @@ -33,6 +33,7 @@ 9181 {id} + 1 10000 diff --git a/tests/jepsen.clickhouse/resources/keeper_config_solo.xml b/tests/jepsen.clickhouse/resources/keeper_config_solo.xml index 0054cad8f85..6896beb9a4d 100644 --- a/tests/jepsen.clickhouse/resources/keeper_config_solo.xml +++ b/tests/jepsen.clickhouse/resources/keeper_config_solo.xml @@ -11,6 +11,7 @@ 9181 1 + 1 10000 diff --git a/tests/performance/least_greatest.xml b/tests/performance/least_greatest.xml new file mode 100644 index 00000000000..522bcd9004d --- /dev/null +++ b/tests/performance/least_greatest.xml @@ -0,0 +1,10 @@ + + CREATE TABLE test (id Int32, x1 Nullable(Int32), x2 Nullable(Float32)) ENGINE = MergeTree() ORDER BY id + + INSERT INTO test SELECT number, number+1, number + 2 FROM numbers(1000000) + + SELECT COUNT(1) FROM test WHERE least(x1, x2) > 1 + SELECT COUNT(1) FROM test WHERE GREATEST(x1, x2) > 1 + + DROP TABLE IF EXISTS test + \ No newline at end of file diff --git a/tests/performance/scripts/compare.sh b/tests/performance/scripts/compare.sh index 9a0fb5b335c..cb56ab6c5bf 100755 --- a/tests/performance/scripts/compare.sh +++ b/tests/performance/scripts/compare.sh @@ -87,6 +87,7 @@ function configure --path db0 --user_files_path db0/user_files --top_level_domains_path "$(left_or_right right top_level_domains)" + --keeper_server.storage_path coordination0 --tcp_port $LEFT_SERVER_PORT ) left/clickhouse-server "${setup_left_server_opts[@]}" &> setup-server-log.log & @@ -113,8 +114,12 @@ function configure rm -r db0/preprocessed_configs ||: rm -r db0/{data,metadata}/system ||: rm db0/status ||: + cp -al db0/ left/db/ + cp -R coordination0 left/coordination + cp -al db0/ right/db/ + cp -R coordination0 right/coordination } function restart @@ -135,6 +140,7 @@ function restart --tcp_port $LEFT_SERVER_PORT --keeper_server.tcp_port $LEFT_SERVER_KEEPER_PORT --keeper_server.raft_configuration.server.port $LEFT_SERVER_KEEPER_RAFT_PORT + --keeper_server.storage_path left/coordination --zookeeper.node.port $LEFT_SERVER_KEEPER_PORT --interserver_http_port $LEFT_SERVER_INTERSERVER_PORT ) @@ -154,6 +160,7 @@ function restart --tcp_port $RIGHT_SERVER_PORT --keeper_server.tcp_port $RIGHT_SERVER_KEEPER_PORT --keeper_server.raft_configuration.server.port $RIGHT_SERVER_KEEPER_RAFT_PORT + --keeper_server.storage_path right/coordination --zookeeper.node.port $RIGHT_SERVER_KEEPER_PORT --interserver_http_port $RIGHT_SERVER_INTERSERVER_PORT ) diff --git a/tests/queries/0_stateless/00098_k_union_all.sql b/tests/queries/0_stateless/00098_k_union_all.sql index 059d27075d7..280ad5ca2e6 100644 --- a/tests/queries/0_stateless/00098_k_union_all.sql +++ b/tests/queries/0_stateless/00098_k_union_all.sql @@ -1,4 +1,5 @@ SET output_format_pretty_color=1; +SET output_format_pretty_display_footer_column_names=0; SELECT 1 FORMAT PrettySpace; SELECT 1 UNION ALL SELECT 1 FORMAT PrettySpace; SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 FORMAT PrettySpace; diff --git a/tests/queries/0_stateless/00137_in_constants.reference b/tests/queries/0_stateless/00137_in_constants.reference index 379885fb1ab..94607ffa924 100644 --- a/tests/queries/0_stateless/00137_in_constants.reference +++ b/tests/queries/0_stateless/00137_in_constants.reference @@ -13,6 +13,7 @@ 1 1 1 +1 0 0 0 diff --git a/tests/queries/0_stateless/00137_in_constants.sql b/tests/queries/0_stateless/00137_in_constants.sql index 297acc4ef26..bc365523be1 100644 --- a/tests/queries/0_stateless/00137_in_constants.sql +++ b/tests/queries/0_stateless/00137_in_constants.sql @@ -13,6 +13,7 @@ SELECT 'Hello' IN (SELECT 'Hello'); SELECT materialize('Hello') IN (SELECT 'Hello'); SELECT 'Hello' IN (SELECT materialize('Hello')); SELECT materialize('Hello') IN (SELECT materialize('Hello')); +SELECT toDate('2020-01-01') IN (toDateTime('2020-01-01', 'UTC')); SELECT 2 IN (SELECT 1); SELECT materialize(2) IN (SELECT 1); diff --git a/tests/queries/0_stateless/00300_csv.reference b/tests/queries/0_stateless/00300_csv.reference index e7966a9e8d9..42cd22078c4 100644 --- a/tests/queries/0_stateless/00300_csv.reference +++ b/tests/queries/0_stateless/00300_csv.reference @@ -1,11 +1,11 @@ -"Hello, ""World""",123,"[1,2,3]","(456,['abc','def'])","Newline +"Hello, ""World""",123,"[1,2,3]",456,"['abc','def']","Newline here" "x","y","z","a","b" -"Hello, ""World""",123,"[1,2,3]","(456,['abc','def'])","Newline +"Hello, ""World""",123,"[1,2,3]",456,"['abc','def']","Newline here" "x","y","z","a","b" "String","UInt8","Array(UInt8)","Tuple(UInt16, Array(String))","String" -"Hello, ""World""",123,"[1,2,3]","(456,['abc','def'])","Newline +"Hello, ""World""",123,"[1,2,3]",456,"['abc','def']","Newline here" 0,"0","[]","2000-01-01","2000-01-01 00:00:00" 1,"1","[0]","2000-01-02","2000-01-01 00:00:01" diff --git a/tests/queries/0_stateless/00309_formats.reference b/tests/queries/0_stateless/00309_formats.reference index 5c0ae4d2c3b..e637ee0363a 100644 Binary files a/tests/queries/0_stateless/00309_formats.reference and b/tests/queries/0_stateless/00309_formats.reference differ diff --git a/tests/queries/0_stateless/00405_output_format_pretty_color.sql b/tests/queries/0_stateless/00405_output_format_pretty_color.sql index bc2d0c3adbf..de83567dd5d 100644 --- a/tests/queries/0_stateless/00405_output_format_pretty_color.sql +++ b/tests/queries/0_stateless/00405_output_format_pretty_color.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_display_footer_column_names=0; SET output_format_pretty_color = 0; SHOW SETTING output_format_pretty_color; diff --git a/tests/queries/0_stateless/00405_pretty_formats.sql b/tests/queries/0_stateless/00405_pretty_formats.sql index 00bb09a1c30..4715f4fc110 100644 --- a/tests/queries/0_stateless/00405_pretty_formats.sql +++ b/tests/queries/0_stateless/00405_pretty_formats.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_display_footer_column_names=0; SET output_format_pretty_color = 1; SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT Pretty; diff --git a/tests/queries/0_stateless/00476_pretty_formats_and_widths.sql b/tests/queries/0_stateless/00476_pretty_formats_and_widths.sql index 4bace207fb5..59809841726 100644 --- a/tests/queries/0_stateless/00476_pretty_formats_and_widths.sql +++ b/tests/queries/0_stateless/00476_pretty_formats_and_widths.sql @@ -1,4 +1,4 @@ -SET output_format_pretty_color=1, output_format_pretty_highlight_digit_groups=0; +SET output_format_pretty_color=1, output_format_pretty_highlight_digit_groups=0, output_format_pretty_display_footer_column_names=0; SELECT toUInt64(round(exp10(number))) AS x, toString(x) AS s FROM system.numbers LIMIT 10 FORMAT Pretty; SELECT toUInt64(round(exp10(number))) AS x, toString(x) AS s FROM system.numbers LIMIT 10 FORMAT PrettyCompact; SELECT toUInt64(round(exp10(number))) AS x, toString(x) AS s FROM system.numbers LIMIT 10 FORMAT PrettySpace; diff --git a/tests/queries/0_stateless/00515_enhanced_time_zones.sql b/tests/queries/0_stateless/00515_enhanced_time_zones.sql index 837b0b4be20..f7eb90fa5c8 100644 --- a/tests/queries/0_stateless/00515_enhanced_time_zones.sql +++ b/tests/queries/0_stateless/00515_enhanced_time_zones.sql @@ -1,3 +1,5 @@ +SET allow_deprecated_snowflake_conversion_functions = 1; + SELECT addMonths(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 1, 'Asia/Kolkata'); SELECT addMonths(toDateTime('2017-11-05 10:37:47', 'Asia/Kolkata'), 1); SELECT addMonths(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata'), 1); diff --git a/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql b/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql index 22f6da71247..494d8243534 100644 --- a/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql +++ b/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql @@ -2,6 +2,7 @@ set allow_deprecated_syntax_for_merge_tree=1; set optimize_on_insert = 0; +set optimize_trivial_insert_select = 1; drop table if exists mult_tab; create table mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(date, (date), 8192, sign, version); diff --git a/tests/queries/0_stateless/00569_parse_date_time_best_effort.sql b/tests/queries/0_stateless/00569_parse_date_time_best_effort.sql index 511addb4e4d..ca423c1922a 100644 --- a/tests/queries/0_stateless/00569_parse_date_time_best_effort.sql +++ b/tests/queries/0_stateless/00569_parse_date_time_best_effort.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_display_footer_column_names=0; SELECT s, parseDateTimeBestEffortOrNull(s, 'UTC') AS a, diff --git a/tests/queries/0_stateless/00576_nested_and_prewhere.sql b/tests/queries/0_stateless/00576_nested_and_prewhere.sql index 5916e679f1e..f5d9f0d6240 100644 --- a/tests/queries/0_stateless/00576_nested_and_prewhere.sql +++ b/tests/queries/0_stateless/00576_nested_and_prewhere.sql @@ -4,10 +4,10 @@ CREATE TABLE nested (x UInt64, filter UInt8, n Nested(a UInt64)) ENGINE = MergeT INSERT INTO nested SELECT number, number % 2, range(number % 10) FROM system.numbers LIMIT 100000; ALTER TABLE nested ADD COLUMN n.b Array(UInt64); -SELECT DISTINCT n.b FROM nested PREWHERE filter; +SELECT DISTINCT n.b FROM nested PREWHERE filter ORDER BY ALL; ALTER TABLE nested ADD COLUMN n.c Array(UInt64) DEFAULT arrayMap(x -> x * 2, n.a); -SELECT DISTINCT n.c FROM nested PREWHERE filter; -SELECT DISTINCT n.a, n.c FROM nested PREWHERE filter; +SELECT DISTINCT n.c FROM nested PREWHERE filter ORDER BY ALL; +SELECT DISTINCT n.a, n.c FROM nested PREWHERE filter ORDER BY ALL; DROP TABLE nested; diff --git a/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh b/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh index 7ec4d99f028..f2b4cae8bb0 100755 --- a/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh +++ b/tests/queries/0_stateless/00636_partition_key_parts_pruning.sh @@ -11,9 +11,11 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE single_col_partition_key(x UInt32) EN ${CLICKHOUSE_CLIENT} --query="INSERT INTO single_col_partition_key VALUES (1), (2), (3), (4), (11), (12), (20)" -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM single_col_partition_key WHERE x < 3 FORMAT XML" | grep -F rows_read | sed 's/^[ \t]*//g' -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM single_col_partition_key WHERE x >= 11 FORMAT XML" | grep -F rows_read | sed 's/^[ \t]*//g' -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM single_col_partition_key WHERE x = 20 FORMAT XML" | grep -F rows_read | sed 's/^[ \t]*//g' +DISABLE_COUNT_OPTIMIZATION="SETTINGS optimize_trivial_count_query = 0, optimize_use_implicit_projections = 0" + +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM single_col_partition_key WHERE x < 3 FORMAT XML $DISABLE_COUNT_OPTIMIZATION" | grep -F rows_read | sed 's/^[ \t]*//g' +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM single_col_partition_key WHERE x >= 11 FORMAT XML $DISABLE_COUNT_OPTIMIZATION" | grep -F rows_read | sed 's/^[ \t]*//g' +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM single_col_partition_key WHERE x = 20 FORMAT XML $DISABLE_COUNT_OPTIMIZATION" | grep -F rows_read | sed 's/^[ \t]*//g' ${CLICKHOUSE_CLIENT} --query="DROP TABLE single_col_partition_key" @@ -31,14 +33,14 @@ ${CLICKHOUSE_CLIENT} --query="INSERT INTO composite_partition_key VALUES \ ${CLICKHOUSE_CLIENT} --query="INSERT INTO composite_partition_key VALUES \ (301, 20, 3), (302, 21, 3), (303, 22, 3)" -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE a > 400 FORMAT XML SETTINGS optimize_trivial_count_query = 0" | grep -F rows_read | sed 's/^[ \t]*//g' -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE b = 11 FORMAT XML SETTINGS optimize_trivial_count_query = 0" | grep -F rows_read | sed 's/^[ \t]*//g' -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE c = 4 FORMAT XML SETTINGS optimize_trivial_count_query = 0" | grep -F rows_read | sed 's/^[ \t]*//g' +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE a > 400 FORMAT XML $DISABLE_COUNT_OPTIMIZATION" | grep -F rows_read | sed 's/^[ \t]*//g' +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE b = 11 FORMAT XML $DISABLE_COUNT_OPTIMIZATION" | grep -F rows_read | sed 's/^[ \t]*//g' +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE c = 4 FORMAT XML $DISABLE_COUNT_OPTIMIZATION" | grep -F rows_read | sed 's/^[ \t]*//g' -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE a < 200 AND c = 2 FORMAT XML SETTINGS optimize_trivial_count_query = 0" | grep -F rows_read | sed 's/^[ \t]*//g' -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE a = 301 AND b < 20 FORMAT XML SETTINGS optimize_trivial_count_query = 0" | grep -F rows_read | sed 's/^[ \t]*//g' -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE b >= 12 AND c = 2 FORMAT XML SETTINGS optimize_trivial_count_query = 0" | grep -F rows_read | sed 's/^[ \t]*//g' +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE a < 200 AND c = 2 FORMAT XML $DISABLE_COUNT_OPTIMIZATION" | grep -F rows_read | sed 's/^[ \t]*//g' +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE a = 301 AND b < 20 FORMAT XML $DISABLE_COUNT_OPTIMIZATION" | grep -F rows_read | sed 's/^[ \t]*//g' +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE b >= 12 AND c = 2 FORMAT XML $DISABLE_COUNT_OPTIMIZATION" | grep -F rows_read | sed 's/^[ \t]*//g' -${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE a = 301 AND b = 21 AND c = 3 FORMAT XML SETTINGS optimize_trivial_count_query = 0" | grep -F rows_read | sed 's/^[ \t]*//g' +${CLICKHOUSE_CLIENT} --query="SELECT count() FROM composite_partition_key WHERE a = 301 AND b = 21 AND c = 3 FORMAT XML $DISABLE_COUNT_OPTIMIZATION" | grep -F rows_read | sed 's/^[ \t]*//g' ${CLICKHOUSE_CLIENT} --query="DROP TABLE composite_partition_key" diff --git a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh index e10b2f86145..7766d7720e1 100755 --- a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh +++ b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh @@ -45,6 +45,7 @@ ${CLICKHOUSE_CLIENT} --query="INSERT INTO enum_test_table VALUES ('hello'), ('wo ${CLICKHOUSE_CLIENT} --query="INSERT INTO date_test_table VALUES (1), (2), (2), (256), (257), (257);" CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') +CLICKHOUSE_CLIENT="${CLICKHOUSE_CLIENT} --optimize_use_implicit_projections 0" ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM string_test_table WHERE toUInt64(val) == 0;" 2>&1 |grep -q "3 marks to read from 1 ranges" && echo "no monotonic int case: String -> UInt64" ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM fixed_string_test_table WHERE toUInt64(val) == 0;" 2>&1 |grep -q "3 marks to read from 1 ranges" && echo "no monotonic int case: FixedString -> UInt64" diff --git a/tests/queries/0_stateless/00695_pretty_max_column_pad_width.sql b/tests/queries/0_stateless/00695_pretty_max_column_pad_width.sql index 9a421361b71..24dd9336a88 100644 --- a/tests/queries/0_stateless/00695_pretty_max_column_pad_width.sql +++ b/tests/queries/0_stateless/00695_pretty_max_column_pad_width.sql @@ -1,2 +1,3 @@ +SET output_format_pretty_display_footer_column_names=0; SET output_format_pretty_max_column_pad_width = 250; SELECT range(number) FROM system.numbers LIMIT 100 FORMAT PrettyCompactNoEscapes; diff --git a/tests/queries/0_stateless/00727_concat.reference b/tests/queries/0_stateless/00727_concat.reference index 6fb23c072d3..329ad36ad3c 100644 --- a/tests/queries/0_stateless/00727_concat.reference +++ b/tests/queries/0_stateless/00727_concat.reference @@ -72,3 +72,6 @@ foo \N \N Testing the alias +-- Empty argument tests + +String diff --git a/tests/queries/0_stateless/00727_concat.sql b/tests/queries/0_stateless/00727_concat.sql index 01792545b5a..76dae541261 100644 --- a/tests/queries/0_stateless/00727_concat.sql +++ b/tests/queries/0_stateless/00727_concat.sql @@ -93,4 +93,6 @@ SELECT concat(materialize(NULL :: Nullable(UInt64))); SELECT CONCAT('Testing the ', 'alias'); -SELECT concat(); -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION } +SELECT '-- Empty argument tests'; +SELECT concat(); +select toTypeName(concat()); diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index 1bb4dbd34de..af746c43da9 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage +# Tags: long, no-s3-storage, no-tsan # no-s3 because read FileOpen metric set -e @@ -31,6 +31,6 @@ $CLICKHOUSE_CLIENT $settings -q "$touching_many_parts_query" &> /dev/null $CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH LOGS" -$CLICKHOUSE_CLIENT $settings -q "SELECT ProfileEvents['FileOpen'] as opened_files FROM system.query_log WHERE query='$touching_many_parts_query' and current_database = currentDatabase() ORDER BY event_time DESC, opened_files DESC LIMIT 1;" +$CLICKHOUSE_CLIENT $settings -q "SELECT ProfileEvents['FileOpen'] as opened_files FROM system.query_log WHERE query = '$touching_many_parts_query' AND current_database = currentDatabase() AND event_date >= yesterday() ORDER BY event_time DESC, opened_files DESC LIMIT 1;" $CLICKHOUSE_CLIENT $settings -q "DROP TABLE IF EXISTS merge_tree_table;" diff --git a/tests/queries/0_stateless/00732_base64_functions.sql b/tests/queries/0_stateless/00732_base64_functions.sql index 3c60bf939fe..b4be8db4ede 100644 --- a/tests/queries/0_stateless/00732_base64_functions.sql +++ b/tests/queries/0_stateless/00732_base64_functions.sql @@ -1,6 +1,5 @@ -- Tags: no-fasttest - -SET send_logs_level = 'fatal'; +-- no-fasttest because aklomp-base64 library is required SELECT base64Encode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT base64Decode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/00763_lock_buffer_long.sh b/tests/queries/0_stateless/00763_lock_buffer_long.sh index 50680724149..046e4efaa85 100755 --- a/tests/queries/0_stateless/00763_lock_buffer_long.sh +++ b/tests/queries/0_stateless/00763_lock_buffer_long.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-s3-storage, no-msan, no-asan, no-tsan, no-debug +# Some kind of stress test, it doesn't make sense to test in a non-release build set -e @@ -15,7 +16,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE buffer_00763_2 (s String) ENGINE = Bu function thread1() { - seq 1 500 | sed -r -e 's/.+/DROP TABLE IF EXISTS mt_00763_2; CREATE TABLE mt_00763_2 (s String) ENGINE = MergeTree ORDER BY s; INSERT INTO mt_00763_2 SELECT toString(number) FROM numbers(10);/' | ${CLICKHOUSE_CLIENT} --multiquery --ignore-error ||: + seq 1 500 | sed -r -e 's/.+/DROP TABLE IF EXISTS mt_00763_2; CREATE TABLE mt_00763_2 (s String) ENGINE = MergeTree ORDER BY s; INSERT INTO mt_00763_2 SELECT toString(number) FROM numbers(10);/' | ${CLICKHOUSE_CLIENT} --fsync-metadata 0 --multiquery --ignore-error ||: } function thread2() diff --git a/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql b/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql index 1e3b24e60c0..7c2b75cdd70 100644 --- a/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql +++ b/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_display_footer_column_names=0; SELECT s, parseDateTimeBestEffortOrNull(s, 'UTC') AS a, diff --git a/tests/queries/0_stateless/00830_join_overwrite.reference b/tests/queries/0_stateless/00830_join_overwrite.reference index 4792e70f333..e7d6081b647 100644 --- a/tests/queries/0_stateless/00830_join_overwrite.reference +++ b/tests/queries/0_stateless/00830_join_overwrite.reference @@ -1,2 +1,4 @@ 2 3 +2 +3 diff --git a/tests/queries/0_stateless/00830_join_overwrite.sql b/tests/queries/0_stateless/00830_join_overwrite.sql index cb7e277906b..bc3662528db 100644 --- a/tests/queries/0_stateless/00830_join_overwrite.sql +++ b/tests/queries/0_stateless/00830_join_overwrite.sql @@ -9,5 +9,14 @@ INSERT INTO kv_overwrite VALUES (1, 2); INSERT INTO kv_overwrite VALUES (1, 3); SELECT joinGet('kv_overwrite', 'v', toUInt32(1)); + +CREATE TABLE t2 (k UInt32, v UInt32) ENGINE = Memory; +INSERT INTO t2 VALUES (1, 2), (1, 3); + +SET allow_experimental_analyzer = 1; + +SELECT v FROM (SELECT 1 as k) t1 ANY INNER JOIN t2 USING (k) SETTINGS join_any_take_last_row = 0; +SELECT v FROM (SELECT 1 as k) t1 ANY INNER JOIN t2 USING (k) SETTINGS join_any_take_last_row = 1; + DROP TABLE kv; DROP TABLE kv_overwrite; diff --git a/tests/queries/0_stateless/00967_ubsan_bit_test.reference b/tests/queries/0_stateless/00967_ubsan_bit_test.reference deleted file mode 100644 index 573541ac970..00000000000 --- a/tests/queries/0_stateless/00967_ubsan_bit_test.reference +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/tests/queries/0_stateless/00967_ubsan_bit_test.sql b/tests/queries/0_stateless/00967_ubsan_bit_test.sql deleted file mode 100644 index 1682e725670..00000000000 --- a/tests/queries/0_stateless/00967_ubsan_bit_test.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT sum(ignore(bitTest(number, 65))) FROM numbers(10); diff --git a/tests/queries/0_stateless/00987_distributed_stack_overflow.sql b/tests/queries/0_stateless/00987_distributed_stack_overflow.sql index 5a22ac56413..ba58713fe0e 100644 --- a/tests/queries/0_stateless/00987_distributed_stack_overflow.sql +++ b/tests/queries/0_stateless/00987_distributed_stack_overflow.sql @@ -9,10 +9,6 @@ CREATE TABLE distr (x UInt8) ENGINE = Distributed(test_shard_localhost, currentD CREATE TABLE distr0 (x UInt8) ENGINE = Distributed(test_shard_localhost, '', distr0); -- { serverError INFINITE_LOOP } CREATE TABLE distr1 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr2); -CREATE TABLE distr2 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr1); - -SELECT * FROM distr1; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } -SELECT * FROM distr2; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } +CREATE TABLE distr2 (x UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), distr1); -- { serverError INFINITE_LOOP } DROP TABLE distr1; -DROP TABLE distr2; diff --git a/tests/queries/0_stateless/00997_set_index_array.sql b/tests/queries/0_stateless/00997_set_index_array.sql index 34d0f0b71ec..d6d27f5a6a0 100644 --- a/tests/queries/0_stateless/00997_set_index_array.sql +++ b/tests/queries/0_stateless/00997_set_index_array.sql @@ -17,6 +17,8 @@ select from system.numbers limit 10000000; +OPTIMIZE TABLE set_array FINAL; + SET max_rows_to_read = 8192; select count() from set_array where has(index_array, 333); diff --git a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh index 16ebf2e6e54..c3ad29d33a1 100755 --- a/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh +++ b/tests/queries/0_stateless/01006_simpod_empty_part_single_column_write.sh @@ -18,7 +18,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_with_empty_part ENGINE = MergeTree() ORDER BY id PARTITION BY id -SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0 +SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0, remove_empty_parts = 0, min_bytes_for_wide_part=0, min_bytes_for_full_part_storage = 0 " diff --git a/tests/queries/0_stateless/01016_input_null_as_default.sh b/tests/queries/0_stateless/01016_input_null_as_default.sh index 8d6a9a07435..24d93b2703c 100755 --- a/tests/queries/0_stateless/01016_input_null_as_default.sh +++ b/tests/queries/0_stateless/01016_input_null_as_default.sh @@ -11,8 +11,8 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE default_by_other_column (a Float32 DEFA echo 'CSV' echo '\N, 1, \N, "2019-07-22", "[10, 20, 30]", \N -1, world, 3, "2019-07-23", \N, "('\''tuple'\'', 3.14)" -2, \N, 123, \N, "[]", "('\''test'\'', 2.71828)" +1, world, 3, "2019-07-23", \N, tuple, 3.14 +2, \N, 123, \N, "[]", test, 2.71828 3, \N, \N, \N, \N, \N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; $CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; diff --git a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh index 3a2eac1f38f..26c2e54e637 100755 --- a/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh +++ b/tests/queries/0_stateless/01019_alter_materialized_view_consistent.sh @@ -54,10 +54,10 @@ function alter_thread() { for i in {0..5}; do ALTER[$i]="ALTER TABLE mv MODIFY QUERY SELECT v == 1 as test, v as case FROM src_a;" done - # Insert 3 ALTERs to src_b, one in the first half of the array and two in arbitrary positions. - ALTER[$RANDOM % 3]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" - ALTER[$RANDOM % 6]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" - ALTER[$RANDOM % 6]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" + # Insert 3 ALTERs to src_b randomly in each third of array. + ALTER[$RANDOM % 2]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" + ALTER[$RANDOM % 2 + 2]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" + ALTER[$RANDOM % 2 + 4]="ALTER TABLE mv MODIFY QUERY SELECT v == 2 as test, v as case FROM src_b;" i=0 while true; do diff --git a/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql b/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql index eccb2d25878..7b7d1706346 100644 --- a/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql +++ b/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql @@ -1,4 +1,6 @@ +SET optimize_trivial_insert_select = 1; SET check_query_single_value_result = 0; + DROP TABLE IF EXISTS check_query_test; CREATE TABLE check_query_test (SomeKey UInt64, SomeValue String) ENGINE = MergeTree() ORDER BY SomeKey SETTINGS min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0; diff --git a/tests/queries/0_stateless/01045_order_by_pk_special_storages.sh b/tests/queries/0_stateless/01045_order_by_pk_special_storages.sh index 12421a2b308..0714b4c91ed 100755 --- a/tests/queries/0_stateless/01045_order_by_pk_special_storages.sh +++ b/tests/queries/0_stateless/01045_order_by_pk_special_storages.sh @@ -16,8 +16,8 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE s1 (a UInt32, s String) ENGINE = MergeTree O $CLICKHOUSE_CLIENT -q "CREATE TABLE s2 (a UInt32, s String) ENGINE = MergeTree ORDER BY a PARTITION BY a % 3 SETTINGS min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0" $CLICKHOUSE_CLIENT -q "CREATE TABLE m (a UInt32, s String) engine = Merge('$CLICKHOUSE_DATABASE', 's[1,2]')" -$CLICKHOUSE_CLIENT -q "INSERT INTO s1 select (number % 20) * 2 as n, toString(number * number) from numbers(100000)" -$CLICKHOUSE_CLIENT -q "INSERT INTO s2 select (number % 20) * 2 + 1 as n, toString(number * number * number) from numbers(100000)" +$CLICKHOUSE_CLIENT --optimize_trivial_insert_select 1 -q "INSERT INTO s1 select (number % 20) * 2 as n, toString(number * number) from numbers(100000)" +$CLICKHOUSE_CLIENT --optimize_trivial_insert_select 1 -q "INSERT INTO s2 select (number % 20) * 2 + 1 as n, toString(number * number * number) from numbers(100000)" $CLICKHOUSE_CLIENT -q "SELECT '---StorageMerge---'" $CLICKHOUSE_CLIENT -q "SELECT a FROM m ORDER BY a LIMIT 5" diff --git a/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh b/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh index cd6501bbebf..1185498a5f7 100755 --- a/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh +++ b/tests/queries/0_stateless/01045_zookeeper_system_mutations_with_parts_names.sh @@ -25,7 +25,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_for_mutations(k UInt32, v1 UInt ${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES table_for_mutations" -${CLICKHOUSE_CLIENT} --query="INSERT INTO table_for_mutations select number, number from numbers(100000)" +${CLICKHOUSE_CLIENT} --optimize_trivial_insert_select 1 --query="INSERT INTO table_for_mutations select number, number from numbers(100000)" ${CLICKHOUSE_CLIENT} --query="SELECT sum(v1) FROM table_for_mutations" @@ -53,7 +53,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE replicated_table_for_mutations(k UInt ${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES replicated_table_for_mutations" # test relays on part ids, which are non-deterministic with keeper fault injections, so disable it -${CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --query="INSERT INTO replicated_table_for_mutations select number, number from numbers(100000)" +${CLICKHOUSE_CLIENT} --optimize_trivial_insert_select 1 --insert_keeper_fault_injection_probability=0 --query="INSERT INTO replicated_table_for_mutations select number, number from numbers(100000)" ${CLICKHOUSE_CLIENT} --query="SELECT sum(v1) FROM replicated_table_for_mutations" diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index 2db14fcdddf..e65650816ab 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -49,16 +49,16 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01056_window_view_proc_hop_watch.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01056_window_view_proc_hop_watch.mt VALUES (1, now('US/Samoa') + 3)" ) client1.expect("1" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py index 2323ee5c838..3dbb176b0dc 100755 --- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py +++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py @@ -47,7 +47,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH db_01059_event_hop_watch_strict_asc.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO db_01059_event_hop_watch_strict_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) @@ -57,7 +57,7 @@ with client(name="client1>", log=log) as client1, client( ) client2.expect("Ok.") client1.expect("1*1990-01-01 12:00:02" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") client2.send( "INSERT INTO db_01059_event_hop_watch_strict_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:10', 'US/Samoa'));" @@ -65,11 +65,11 @@ with client(name="client1>", log=log) as client1, client( client2.expect("Ok.") client1.expect("1*1990-01-01 12:00:06" + end_of_block) client1.expect("1*1990-01-01 12:00:08" + end_of_block) - client1.expect("Progress: 3.00 rows.*\)") + client1.expect("Progress: 3.00 rows.*\\)") # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index db9e8cef6c5..d6cc3ee1a88 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -49,7 +49,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01062_window_view_event_hop_watch_asc.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) @@ -69,11 +69,11 @@ with client(name="client1>", log=log) as client1, client( client2.expect(prompt) client1.expect("1" + end_of_block) client1.expect("2" + end_of_block) - client1.expect("Progress: 3.00 rows.*\)") + client1.expect("Progress: 3.00 rows.*\\)") # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index b8d5ff02d37..e5f9ab59f60 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -50,7 +50,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01065_window_view_event_hop_watch_bounded.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:00');" ) @@ -72,7 +72,7 @@ with client(name="client1>", log=log) as client1, client( # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py index 21c2e831afc..8c3a46992dc 100755 --- a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py +++ b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py @@ -49,23 +49,23 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01069_window_view_proc_tumble_watch.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01069_window_view_proc_tumble_watch.mt VALUES (1, now('US/Samoa') + 3)" ) client2.expect("Ok.") client1.expect("1" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") client2.send( "INSERT INTO 01069_window_view_proc_tumble_watch.mt VALUES (1, now('US/Samoa') + 3)" ) client2.expect("Ok.") client1.expect("1" + end_of_block) - client1.expect("Progress: 2.00 rows.*\)") + client1.expect("Progress: 2.00 rows.*\\)") # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01070_window_view_watch_events.py b/tests/queries/0_stateless/01070_window_view_watch_events.py index 1cf7678a014..172a82a29da 100755 --- a/tests/queries/0_stateless/01070_window_view_watch_events.py +++ b/tests/queries/0_stateless/01070_window_view_watch_events.py @@ -49,7 +49,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01070_window_view_watch_events.wv EVENTS") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01070_window_view_watch_events.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) @@ -59,11 +59,11 @@ with client(name="client1>", log=log) as client1, client( ) client2.expect("Ok.") client1.expect("1990-01-01 12:00:05" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index 3f3dfe0cda8..05aeb1b4ccb 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -55,7 +55,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01078_window_view_alter_query_watch.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) @@ -65,7 +65,7 @@ with client(name="client1>", log=log) as client1, client( ) client2.expect("Ok.") client1.expect("1" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") client2.send( "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) * 2 AS count, hopEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) @@ -75,7 +75,7 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client3.send("WATCH 01078_window_view_alter_query_watch.wv") client3.expect("Query id" + end_of_block) - client3.expect("Progress: 0.00 rows.*\)") + client3.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" ) @@ -85,11 +85,11 @@ with client(name="client1>", log=log) as client1, client( ) client2.expect("Ok.") client3.expect("2" + end_of_block) - client3.expect("Progress: 1.00 rows.*\)") + client3.expect("Progress: 1.00 rows.*\\)") # send Ctrl-C client3.send("\x03", eol="") - match = client3.expect("(%s)|([#\$] )" % prompt) + match = client3.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client3.send(client3.command) client3.expect(prompt) diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference index 708c5d9d994..26085389381 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.reference @@ -1,3 +1,22 @@ +-- bitTestAny +0 1 +1 0 +2 1 +3 0 +4 1 +5 0 +6 1 +7 0 +-- bitTestAll +0 1 +1 0 +2 1 +3 0 +4 1 +5 0 +6 1 +7 0 +-- bitTest 0 1 1 0 2 1 @@ -6,98 +25,6 @@ 5 0 6 1 7 0 -8 0 -9 0 -10 0 -11 0 -12 0 -13 0 -14 0 -15 0 -16 0 -17 0 -18 0 -19 0 -20 0 -21 0 -22 0 -23 0 -24 0 -25 0 -26 0 -27 0 -28 0 -29 0 -30 0 -31 0 -32 0 -33 0 -34 0 -35 0 -36 0 -37 0 -38 0 -39 0 -40 0 -41 0 -42 0 -43 0 -44 0 -45 0 -46 0 -47 0 -48 0 -49 0 -50 0 -51 0 -52 0 -53 0 -54 0 -55 0 -56 0 -57 0 -58 0 -59 0 -60 0 -61 0 -62 0 -63 0 -64 0 -65 0 -66 0 -67 0 -68 0 -69 0 -70 0 -71 0 -72 0 -73 0 -74 0 -75 0 -76 0 -77 0 -78 0 -79 0 -80 0 -81 0 -82 0 -83 0 -84 0 -85 0 -86 0 -87 0 -88 0 -89 0 -90 0 -91 0 -92 0 -93 0 -94 0 -95 0 -96 0 -97 0 -98 0 -99 0 0 1 1 0 2 1 @@ -107,94 +34,10 @@ 6 1 7 0 8 1 -9 1 +9 0 10 1 -11 1 +11 0 12 1 -13 1 +13 0 14 1 -15 1 -16 1 -17 1 -18 1 -19 1 -20 1 -21 1 -22 1 -23 1 -24 1 -25 1 -26 1 -27 1 -28 1 -29 1 -30 1 -31 1 -32 1 -33 1 -34 1 -35 1 -36 1 -37 1 -38 1 -39 1 -40 1 -41 1 -42 1 -43 1 -44 1 -45 1 -46 1 -47 1 -48 1 -49 1 -50 1 -51 1 -52 1 -53 1 -54 1 -55 1 -56 1 -57 1 -58 1 -59 1 -60 1 -61 1 -62 1 -63 1 -64 1 -65 1 -66 1 -67 1 -68 1 -69 1 -70 1 -71 1 -72 1 -73 1 -74 1 -75 1 -76 1 -77 1 -78 1 -79 1 -80 1 -81 1 -82 1 -83 1 -84 1 -85 1 -86 1 -87 1 -88 1 -89 1 -90 1 -91 1 -92 1 -93 1 -94 1 -95 1 -96 1 -97 1 -98 1 -99 1 +15 0 diff --git a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql index 82e2c5a2380..e741cb249d0 100644 --- a/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql +++ b/tests/queries/0_stateless/01082_bit_test_out_of_bound.sql @@ -1,2 +1,13 @@ -SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); -SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(100); +SELECT '-- bitTestAny'; +SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTestAny(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } + +SELECT '-- bitTestAll'; +SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTestAll(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } + +SELECT '-- bitTest'; +SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8); +SELECT number, bitTest(toUInt8(1 + 4 + 16 + 64), number) FROM numbers(8, 16); -- { serverError PARAMETER_OUT_OF_BOUND } +SELECT number, bitTest(toUInt16(1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384 + 65536), number) FROM numbers(16); +SELECT -number, bitTest(toUInt16(1), -number) FROM numbers(8); -- { serverError PARAMETER_OUT_OF_BOUND } diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.py b/tests/queries/0_stateless/01082_window_view_watch_limit.py index 9938ebcab98..5dcdfdb5020 100755 --- a/tests/queries/0_stateless/01082_window_view_watch_limit.py +++ b/tests/queries/0_stateless/01082_window_view_watch_limit.py @@ -49,7 +49,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01082_window_view_watch_limit.wv LIMIT 1") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01082_window_view_watch_limit.mt VALUES (1, '1990/01/01 12:00:00');" ) @@ -59,7 +59,7 @@ with client(name="client1>", log=log) as client1, client( ) client2.expect("Ok.") client1.expect("1" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") client1.expect("1 row" + end_of_block) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql index 6268765aa27..bdfbf2a47cf 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql @@ -88,6 +88,7 @@ SELECT sum(n) from rich_syntax; SYSTEM DROP DNS CACHE; DROP TABLE file; +DROP DICTIONARY dict; DROP TABLE url; DROP TABLE view; DROP TABLE buffer; @@ -96,4 +97,3 @@ DROP TABLE merge_tf; DROP TABLE distributed; DROP TABLE distributed_tf; DROP TABLE rich_syntax; -DROP DICTIONARY dict; diff --git a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference index 8984d35930a..03ed07cf1a4 100644 --- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference +++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference @@ -2,6 +2,4 @@ Instruction check fail. The CPU does not support SSSE3 instruction set. Instruction check fail. The CPU does not support SSE4.1 instruction set. Instruction check fail. The CPU does not support SSE4.2 instruction set. Instruction check fail. The CPU does not support POPCNT instruction set. -: MADV_DONTNEED does not work (memset will be used instead) -: (This is the expected behaviour if you are running under QEMU) 1 diff --git a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh index 01047aeb9ab..c37f1f95374 100755 --- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh +++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64 -# Tag no-fasttest: avoid dependency on qemu -- invonvenient when running locally +# Tag no-fasttest: avoid dependency on qemu -- inconvenient when running locally CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index 6666c3308ca..c5012dabc81 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -17,7 +17,7 @@ SELECT name, status FROM system.dictionaries WHERE database='test_01191'; SELECT name, engine FROM system.tables WHERE database='test_01191' ORDER BY name; RENAME DICTIONARY test_01191.table TO test_01191.table1; -- {serverError UNKNOWN_TABLE} -EXCHANGE DICTIONARIES test_01191._ AND test_01191.dict; -- {serverError INCORRECT_QUERY} +EXCHANGE DICTIONARIES test_01191._ AND test_01191.dict; -- {serverError INFINITE_LOOP} EXCHANGE TABLES test_01191.t AND test_01191.dict; SELECT name, status FROM system.dictionaries WHERE database='test_01191'; SELECT name, engine FROM system.tables WHERE database='test_01191' ORDER BY name; diff --git a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql index 2266da5fc8f..5019abc38ab 100644 --- a/tests/queries/0_stateless/01200_mutations_memory_consumption.sql +++ b/tests/queries/0_stateless/01200_mutations_memory_consumption.sql @@ -1,4 +1,5 @@ -- Tags: no-debug, no-parallel, long, no-s3-storage, no-random-settings, no-random-merge-tree-settings +SET optimize_trivial_insert_select = 1; DROP TABLE IF EXISTS table_with_single_pk; diff --git a/tests/queries/0_stateless/01246_buffer_flush.sql b/tests/queries/0_stateless/01246_buffer_flush.sql index 36bcaae383f..66f93371c29 100644 --- a/tests/queries/0_stateless/01246_buffer_flush.sql +++ b/tests/queries/0_stateless/01246_buffer_flush.sql @@ -9,14 +9,14 @@ create table data_01256 as system.numbers Engine=Memory(); select 'min'; create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 2, 100, /* time */ + 5, 100, /* time */ 4, 100, /* rows */ 1, 1e6 /* bytes */ ); insert into buffer_01256 select * from system.numbers limit 5; select count() from data_01256; --- sleep 2 (min time) + 1 (round up) + bias (1) = 4 -select sleepEachRow(2) from numbers(2) FORMAT Null; +-- It is enough to ensure that the buffer will be flushed earlier then 2*min_time (10 sec) +select sleepEachRow(9) FORMAT Null SETTINGS function_sleep_max_microseconds_per_block=10e6; select count() from data_01256; drop table buffer_01256; diff --git a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference index 2f2d638a294..9c2c59f6379 100644 --- a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference +++ b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.reference @@ -1,4 +1,4 @@ -0 NOT_LOADED +NOT_LOADED 0 LOADED 10 1 LOADED diff --git a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql index c11cb64735c..206ddeac612 100644 --- a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql +++ b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql @@ -17,7 +17,7 @@ LAYOUT(FLAT()); DETACH DATABASE dict_db_01254; ATTACH DATABASE dict_db_01254; -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; +SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict')::Nullable(String), 'NOT_LOADED'); SYSTEM RELOAD DICTIONARY dict_db_01254.dict; SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; SELECT dictGetUInt64('dict_db_01254.dict', 'val', toUInt64(0)); diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 2a7ceab57ba..17554f5c8a5 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -24,10 +24,11 @@ ALTER DROP INDEX ['DROP INDEX'] TABLE ALTER INDEX ALTER MATERIALIZE INDEX ['MATERIALIZE INDEX'] TABLE ALTER INDEX ALTER CLEAR INDEX ['CLEAR INDEX'] TABLE ALTER INDEX ALTER INDEX ['INDEX'] \N ALTER TABLE -ALTER ADD STATISTIC ['ALTER ADD STATISTIC'] TABLE ALTER STATISTIC -ALTER DROP STATISTIC ['ALTER DROP STATISTIC'] TABLE ALTER STATISTIC -ALTER MATERIALIZE STATISTIC ['ALTER MATERIALIZE STATISTIC'] TABLE ALTER STATISTIC -ALTER STATISTIC ['STATISTIC'] \N ALTER TABLE +ALTER ADD STATISTICS ['ALTER ADD STATISTIC'] TABLE ALTER STATISTICS +ALTER DROP STATISTICS ['ALTER DROP STATISTIC'] TABLE ALTER STATISTICS +ALTER MODIFY STATISTICS ['ALTER MODIFY STATISTIC'] TABLE ALTER STATISTICS +ALTER MATERIALIZE STATISTICS ['ALTER MATERIALIZE STATISTIC'] TABLE ALTER STATISTICS +ALTER STATISTICS ['STATISTIC'] \N ALTER TABLE ALTER ADD PROJECTION ['ADD PROJECTION'] TABLE ALTER PROJECTION ALTER DROP PROJECTION ['DROP PROJECTION'] TABLE ALTER PROJECTION ALTER MATERIALIZE PROJECTION ['MATERIALIZE PROJECTION'] TABLE ALTER PROJECTION diff --git a/tests/queries/0_stateless/01293_pretty_max_value_width.sql b/tests/queries/0_stateless/01293_pretty_max_value_width.sql index a8e0f19f58e..f1dc0cd1912 100644 --- a/tests/queries/0_stateless/01293_pretty_max_value_width.sql +++ b/tests/queries/0_stateless/01293_pretty_max_value_width.sql @@ -1,4 +1,5 @@ SET output_format_pretty_color = 1, output_format_pretty_max_value_width_apply_for_single_value = 1, output_format_pretty_row_numbers = 0; +SET output_format_pretty_display_footer_column_names=0; SELECT 'привет' AS x, 'мир' AS y FORMAT Pretty; SET output_format_pretty_max_value_width = 5; diff --git a/tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.sql b/tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.sql index b7778dfd780..9fca9b09e1f 100644 --- a/tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.sql +++ b/tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.sql @@ -1,3 +1,4 @@ +SET optimize_trivial_insert_select = 1; SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0; DROP TABLE IF EXISTS test; diff --git a/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql b/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql index ca3166142f0..f83d5d96384 100644 --- a/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql +++ b/tests/queries/0_stateless/01351_parse_date_time_best_effort_us.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_display_footer_column_names=0; SELECT 'parseDateTimeBestEffortUS'; SELECT diff --git a/tests/queries/0_stateless/01393_benchmark_secure_port.sh b/tests/queries/0_stateless/01393_benchmark_secure_port.sh index 7954e439977..f75577e6ddf 100755 --- a/tests/queries/0_stateless/01393_benchmark_secure_port.sh +++ b/tests/queries/0_stateless/01393_benchmark_secure_port.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-tsan, no-asan +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql b/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql index 09a93d94dc3..f3a664fa3e3 100644 --- a/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql +++ b/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql @@ -3,6 +3,7 @@ SET allow_deprecated_error_prone_window_functions = 1; DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = StripeLog; -- For trivial INSERT SELECT, max_threads is lowered to max_insert_threads and max_block_size is changed to min_insert_block_size_rows. +SET optimize_trivial_insert_select = 1; INSERT INTO t SELECT * FROM numbers_mt(1000000); SET max_threads = 1; -- If data was inserted by more threads, we will probably see data out of order. diff --git a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference index fe093e39a56..5accb577786 100644 --- a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference +++ b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference @@ -1,5 +1,3 @@ -: Number of CPUs detected is not deterministic. Per-CPU arena disabled. 1 -: Number of CPUs detected is not deterministic. Per-CPU arena disabled. 100000000 1 diff --git a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh index b3ea6eca3f4..c1bd1e0e1fa 100755 --- a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh +++ b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-msan, no-ubsan, no-fasttest +# Tags: no-tsan, no-asan, no-msan, no-ubsan, no-fasttest, no-debug # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # NOTE: jemalloc is disabled under sanitizers diff --git a/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh index c2750ad31b2..35c2b796570 100755 --- a/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh +++ b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh @@ -14,7 +14,7 @@ for _ in {1..10}; do ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null' # Binding to specific CPU is not required, but this makes the test more reliable. taskset --cpu-list 0 ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null' 2>&1 | { - # build with santiziers does not have jemalloc + # build with sanitiziers does not have jemalloc # and for jemalloc we have separate test # 01502_jemalloc_percpu_arena grep -v ': Number of CPUs detected is not deterministic. Per-CPU arena disabled.' diff --git a/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql index 0a09aef7fb2..04d02b8d389 100644 --- a/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql +++ b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql @@ -1,5 +1,6 @@ SET output_format_pretty_color=1; SET output_format_pretty_row_numbers=0; +SET output_format_pretty_display_footer_column_names=0; SELECT * FROM numbers(10) FORMAT Pretty; SELECT * FROM numbers(10) FORMAT PrettyCompact; SELECT * FROM numbers(10) FORMAT PrettyCompactMonoBlock; diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.expect-not-a-test-case b/tests/queries/0_stateless/01526_client_start_and_exit.expect-not-a-test-case deleted file mode 100755 index 00fb5c4e85b..00000000000 --- a/tests/queries/0_stateless/01526_client_start_and_exit.expect-not-a-test-case +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/expect -f - -log_user 1 -set timeout 5 -match_max 100000 - -spawn bash -c "$env(CLICKHOUSE_CLIENT_BINARY) --no-warnings $env(CLICKHOUSE_CLIENT_OPT)" -expect ":) " -send -- "\4" -expect eof diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.reference b/tests/queries/0_stateless/01526_client_start_and_exit.reference deleted file mode 100644 index e3e2e7b22af..00000000000 --- a/tests/queries/0_stateless/01526_client_start_and_exit.reference +++ /dev/null @@ -1 +0,0 @@ -Loaded 10000 queries. diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.sh b/tests/queries/0_stateless/01526_client_start_and_exit.sh deleted file mode 100755 index 0c5c94e3eac..00000000000 --- a/tests/queries/0_stateless/01526_client_start_and_exit.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Create a huge amount of tables, so Suggest will take a time to load -${CLICKHOUSE_CLIENT} -q "SELECT 'CREATE TABLE test_' || hex(randomPrintableASCII(40)) || '(x UInt8) Engine=Memory;' FROM numbers(10000)" --format=TSVRaw | ${CLICKHOUSE_BENCHMARK} -c32 -i 10000 -d 0 2>&1 | grep -F 'Loaded 10000 queries' - -function stress() -{ - # 2004l is ignored because parallel running expect emulated terminal doesn't - # work well with bracketed paste enabling sequence, which is \e033?2004l - # (https://cirw.in/blog/bracketed-paste) - while true; do - "${CURDIR}"/01526_client_start_and_exit.expect-not-a-test-case | grep -v -P 'ClickHouse client|Connecting|Connected|:\) Bye\.|new year|^\s*$|spawn bash|\?2004l|^0\s*$' - done -} - -export CURDIR -export -f stress - -for _ in {1..10}; do - timeout 3 bash -c stress & -done - -wait diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql index c202ad349d6..95b46c69e83 100644 --- a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql @@ -10,6 +10,7 @@ PARTITION BY key%2 ORDER BY (key, key/2) SETTINGS index_granularity=10, index_granularity_bytes='10Mi'; +SET optimize_trivial_insert_select = 1; INSERT INTO data_01551 SELECT number FROM numbers(100000); SET max_threads=3; SET merge_tree_min_rows_for_concurrent_read=10000; diff --git a/tests/queries/0_stateless/01552_dict_fixedstring.sql b/tests/queries/0_stateless/01552_dict_fixedstring.sql index 01d55656e3c..0b19c9980a4 100644 --- a/tests/queries/0_stateless/01552_dict_fixedstring.sql +++ b/tests/queries/0_stateless/01552_dict_fixedstring.sql @@ -16,5 +16,5 @@ LIFETIME(MIN 10 MAX 10); SELECT dictGet(currentDatabase() || '.dict', 's', number) FROM numbers(2); -DROP TABLE src; DROP DICTIONARY dict; +DROP TABLE src; diff --git a/tests/queries/0_stateless/01553_settings_early_apply.sql b/tests/queries/0_stateless/01553_settings_early_apply.sql index 4c168bdb3a5..821e09f6651 100644 --- a/tests/queries/0_stateless/01553_settings_early_apply.sql +++ b/tests/queries/0_stateless/01553_settings_early_apply.sql @@ -1,3 +1,4 @@ +set output_format_pretty_display_footer_column_names=0; set output_format_write_statistics=0; select * from numbers(100) settings max_result_rows = 1; -- { serverError TOO_MANY_ROWS_OR_BYTES } diff --git a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference index d5bdb816bf2..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference +++ b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference @@ -1 +0,0 @@ -Unknown data type family: CODEC diff --git a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh deleted file mode 100755 index 8a3242c7036..00000000000 --- a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -${CLICKHOUSE_CLIENT} --query "CREATE TABLE t (c CODEC(NONE)) ENGINE = Memory" 2>&1 | grep -oF 'Unknown data type family: CODEC' | uniq diff --git a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sql b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sql new file mode 100644 index 00000000000..ab1cfc89be1 --- /dev/null +++ b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sql @@ -0,0 +1 @@ +CREATE TABLE t (c CODEC(NONE)) ENGINE = Memory -- { clientError SYNTAX_ERROR } \ No newline at end of file diff --git a/tests/queries/0_stateless/01601_accurate_cast.reference b/tests/queries/0_stateless/01601_accurate_cast.reference index 82138e6354a..6a438c49f13 100644 --- a/tests/queries/0_stateless/01601_accurate_cast.reference +++ b/tests/queries/0_stateless/01601_accurate_cast.reference @@ -4,6 +4,11 @@ 5 5 5 +5 +5 +5 +5 +5 1 12 2023-05-30 14:38:20 diff --git a/tests/queries/0_stateless/01601_accurate_cast.sql b/tests/queries/0_stateless/01601_accurate_cast.sql index 471e4e34a4a..3d418b5a36f 100644 --- a/tests/queries/0_stateless/01601_accurate_cast.sql +++ b/tests/queries/0_stateless/01601_accurate_cast.sql @@ -16,6 +16,21 @@ SELECT accurateCast(-129, 'Int8'); -- { serverError CANNOT_CONVERT_TYPE } SELECT accurateCast(5, 'Int8'); SELECT accurateCast(128, 'Int8'); -- { serverError CANNOT_CONVERT_TYPE } +SELECT accurateCast('-1', 'UInt8'); -- { serverError CANNOT_PARSE_TEXT } +SELECT accurateCast('5', 'UInt8'); +SELECT accurateCast('257', 'UInt8'); -- { serverError CANNOT_PARSE_TEXT } +SELECT accurateCast('-1', 'UInt16'); -- { serverError CANNOT_PARSE_TEXT } +SELECT accurateCast('5', 'UInt16'); +SELECT accurateCast('65536', 'UInt16'); -- { serverError CANNOT_PARSE_TEXT } +SELECT accurateCast('-1', 'UInt32'); -- { serverError CANNOT_PARSE_TEXT } +SELECT accurateCast('5', 'UInt32'); +SELECT accurateCast('4294967296', 'UInt32'); -- { serverError CANNOT_PARSE_TEXT } +SELECT accurateCast('-1', 'UInt64'); -- { serverError CANNOT_PARSE_TEXT } +SELECT accurateCast('5', 'UInt64'); +SELECT accurateCast('-129', 'Int8'); -- { serverError CANNOT_PARSE_TEXT } +SELECT accurateCast('5', 'Int8'); +SELECT accurateCast('128', 'Int8'); -- { serverError CANNOT_PARSE_TEXT } + SELECT accurateCast(10, 'Decimal32(9)'); -- { serverError DECIMAL_OVERFLOW } SELECT accurateCast(1, 'Decimal32(9)'); SELECT accurateCast(-10, 'Decimal32(9)'); -- { serverError DECIMAL_OVERFLOW } diff --git a/tests/queries/0_stateless/01602_array_aggregation.reference b/tests/queries/0_stateless/01602_array_aggregation.reference index ec8a0838401..bce8ac88c97 100644 --- a/tests/queries/0_stateless/01602_array_aggregation.reference +++ b/tests/queries/0_stateless/01602_array_aggregation.reference @@ -2,6 +2,10 @@ Array min 1 Array max 6 Array sum 21 Array avg 3.5 +Array min : +[1] +Array max : +[3] Table array int min 1 0 diff --git a/tests/queries/0_stateless/01602_array_aggregation.sql b/tests/queries/0_stateless/01602_array_aggregation.sql index 7c0f6eb8267..d8be9eb82f2 100644 --- a/tests/queries/0_stateless/01602_array_aggregation.sql +++ b/tests/queries/0_stateless/01602_array_aggregation.sql @@ -3,6 +3,12 @@ SELECT 'Array max ', (arrayMax(array(1,2,3,4,5,6))); SELECT 'Array sum ', (arraySum(array(1,2,3,4,5,6))); SELECT 'Array avg ', (arrayAvg(array(1,2,3,4,5,6))); +SELECT 'Array min :'; +SELECT arrayMin([[3], [1], [2]]); + +SELECT 'Array max :'; +SELECT arrayMax([[3], [1], [2]]); + DROP TABLE IF EXISTS test_aggregation; CREATE TABLE test_aggregation (x Array(Int)) ENGINE=TinyLog; diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 694e961bc4a..187ff5c37e1 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -24,6 +24,7 @@ enable_vertical_merge_algorithm = 0; SET max_block_size=900; -- There are about 900 marks for our settings. +SET optimize_trivial_insert_select = 1; INSERT INTO adaptive_table SELECT number, if(number > 700, randomPrintableASCII(102400), randomPrintableASCII(1)) FROM numbers(10000); OPTIMIZE TABLE adaptive_table FINAL; diff --git a/tests/queries/0_stateless/01623_constraints_column_swap.sql b/tests/queries/0_stateless/01623_constraints_column_swap.sql index 3219ee3cda7..242be87938d 100644 --- a/tests/queries/0_stateless/01623_constraints_column_swap.sql +++ b/tests/queries/0_stateless/01623_constraints_column_swap.sql @@ -5,6 +5,7 @@ SET optimize_using_constraints = 1; SET optimize_move_to_prewhere = 1; SET optimize_substitute_columns = 1; SET optimize_append_index = 1; +SET optimize_trivial_insert_select = 1; DROP TABLE IF EXISTS column_swap_test_test; diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 1b9755a74d5..a6af1f2170d 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -44,12 +44,12 @@ Filter 9 10 1 > one condition of filter should be pushed down after aggregating, other condition is aliased Filter column -ALIAS notEquals(s, 4) :: 1 -> and(notEquals(y, 0), notEquals(s, 4)) +ALIAS notEquals(s, 4) :: 4 -> and(notEquals(y, 0), notEquals(s, 4)) UInt8 : 2 Aggregating Filter column: notEquals(y, 0) > (analyzer) one condition of filter should be pushed down after aggregating, other condition is aliased Filter column -ALIAS notEquals(__table1.s, 4_UInt8) :: 0 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 4_UInt8)) +ALIAS notEquals(__table1.s, 4_UInt8) :: 1 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 4_UInt8)) Aggregating Filter column: notEquals(__table1.y, 0_UInt8) 0 1 @@ -63,12 +63,12 @@ Filter column: notEquals(__table1.y, 0_UInt8) 9 10 > one condition of filter should be pushed down after aggregating, other condition is casted Filter column -FUNCTION and(minus(s, 4) :: 1, 1 :: 3) -> and(notEquals(y, 0), minus(s, 4)) UInt8 : 2 +FUNCTION and(minus(s, 4) :: 5, 1 :: 3) -> and(notEquals(y, 0), minus(s, 4)) Aggregating Filter column: notEquals(y, 0) > (analyzer) one condition of filter should be pushed down after aggregating, other condition is casted Filter column -FUNCTION and(minus(__table1.s, 4_UInt8) :: 0, 1 :: 3) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 4_UInt8)) UInt8 : 2 +FUNCTION and(minus(__table1.s, 4_UInt8) :: 1, 1 :: 3) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 4_UInt8)) Aggregating Filter column: notEquals(__table1.y, 0_UInt8) 0 1 @@ -82,12 +82,12 @@ Filter column: notEquals(__table1.y, 0_UInt8) 9 10 > one condition of filter should be pushed down after aggregating, other two conditions are ANDed Filter column -FUNCTION and(minus(s, 8) :: 1, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4)) +FUNCTION and(minus(s, 8) :: 5, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4)) Aggregating Filter column: notEquals(y, 0) > (analyzer) one condition of filter should be pushed down after aggregating, other two conditions are ANDed Filter column -FUNCTION and(minus(__table1.s, 8_UInt8) :: 0, minus(__table1.s, 4_UInt8) :: 2) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8)) +FUNCTION and(minus(__table1.s, 8_UInt8) :: 1, minus(__table1.s, 4_UInt8) :: 2) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8)) Aggregating Filter column: notEquals(__table1.y, 0_UInt8) 0 1 @@ -100,12 +100,12 @@ Filter column: notEquals(__table1.y, 0_UInt8) 9 10 > two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased Filter column -ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4)) +ALIAS notEquals(s, 8) :: 4 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4)) Aggregating Filter column: and(notEquals(y, 0), minus(y, 4)) > (analyzer) two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased Filter column -ALIAS notEquals(__table1.s, 8_UInt8) :: 0 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 8_UInt8), minus(__table1.y, 4_UInt8)) +ALIAS notEquals(__table1.s, 8_UInt8) :: 1 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 8_UInt8), minus(__table1.y, 4_UInt8)) Aggregating Filter column: and(notEquals(__table1.y, 0_UInt8), minus(__table1.y, 4_UInt8)) 0 1 @@ -163,7 +163,6 @@ Filter column: notEquals(__table1.y, 2_UInt8) > filter is pushed down before CreatingSets CreatingSets Filter -Filter 1 3 > one condition of filter is pushed down before LEFT JOIN diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index 864dd69412a..4bd0eb7d908 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -49,14 +49,14 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 -q " select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s != 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|ALIAS notEquals(s, 4) :: 1 -> and(notEquals(y, 0), notEquals(s, 4))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|ALIAS notEquals(s, 4) :: 4 -> and(notEquals(y, 0), notEquals(s, 4)) UInt8 : 2" echo "> (analyzer) one condition of filter should be pushed down after aggregating, other condition is aliased" $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q " explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s != 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|ALIAS notEquals(__table1.s, 4_UInt8) :: 0 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 4_UInt8))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|ALIAS notEquals(__table1.s, 4_UInt8) :: 1 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 4_UInt8))" $CLICKHOUSE_CLIENT -q " select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y @@ -69,14 +69,14 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 -q " select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 4) :: 1, 1 :: 3) -> and(notEquals(y, 0), minus(s, 4)) UInt8 : 2" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 4) :: 5, 1 :: 3) -> and(notEquals(y, 0), minus(s, 4))" echo "> (analyzer) one condition of filter should be pushed down after aggregating, other condition is casted" $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q " explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|FUNCTION and(minus(__table1.s, 4_UInt8) :: 0, 1 :: 3) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 4_UInt8)) UInt8 : 2" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|FUNCTION and(minus(__table1.s, 4_UInt8) :: 1, 1 :: 3) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 4_UInt8))" $CLICKHOUSE_CLIENT -q " select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y @@ -89,14 +89,14 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 --convert_query_to_cnf=0 -q " select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 8 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 8) :: 1, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 8) :: 5, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))" echo "> (analyzer) one condition of filter should be pushed down after aggregating, other two conditions are ANDed" $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 --convert_query_to_cnf=0 -q " explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 8 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|FUNCTION and(minus(__table1.s, 8_UInt8) :: 0, minus(__table1.s, 4_UInt8) :: 2) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|FUNCTION and(minus(__table1.s, 8_UInt8) :: 1, minus(__table1.s, 4_UInt8) :: 2) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8))" $CLICKHOUSE_CLIENT -q " select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y @@ -109,14 +109,14 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 --convert_query_to_cnf=0 -q " select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s != 8 and y - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: and(notEquals(y, 0), minus(y, 4))\|ALIAS notEquals(s, 8) :: 1 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4))" + grep -o "Aggregating\|Filter column\|Filter column: and(notEquals(y, 0), minus(y, 4))\|ALIAS notEquals(s, 8) :: 4 -> and(notEquals(y, 0), notEquals(s, 8), minus(y, 4))" echo "> (analyzer) two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased" $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 --convert_query_to_cnf=0 -q " explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s != 8 and y - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: and(notEquals(__table1.y, 0_UInt8), minus(__table1.y, 4_UInt8))\|ALIAS notEquals(__table1.s, 8_UInt8) :: 0 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 8_UInt8), minus(__table1.y, 4_UInt8))" + grep -o "Aggregating\|Filter column\|Filter column: and(notEquals(__table1.y, 0_UInt8), minus(__table1.y, 4_UInt8))\|ALIAS notEquals(__table1.s, 8_UInt8) :: 1 -> and(notEquals(__table1.y, 0_UInt8), notEquals(__table1.s, 8_UInt8), minus(__table1.y, 4_UInt8))" $CLICKHOUSE_CLIENT -q " select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y diff --git a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.reference b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.reference new file mode 100644 index 00000000000..4905c7f8a71 --- /dev/null +++ b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.reference @@ -0,0 +1,10 @@ + Filter (((WHERE + (Change column names to column identifiers + (Project names + Projection))) + HAVING)) + Filter column: and(notEquals(sum(__table2.number), 0_UInt8), equals(__table1.key, 7_UInt8)) (removed) + Aggregating + Filter (( + (Before GROUP BY + Change column names to column identifiers))) + Filter column: equals(__table1.key, 7_UInt8) (removed) + Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) + Filter column: and(notEquals(sum(number), 0), equals(key, 7)) (removed) + Aggregating + Filter ((( + Before GROUP BY) + WHERE)) + Filter column: and(equals(bitAnd(number, 15), 7), equals(key, 7)) (removed) diff --git a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql new file mode 100644 index 00000000000..1301135b4cb --- /dev/null +++ b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql @@ -0,0 +1,5 @@ +set allow_experimental_analyzer=1; +select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%'; + +set allow_experimental_analyzer=0; +select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%'; diff --git a/tests/queries/0_stateless/01676_dictget_in_default_expression.sql b/tests/queries/0_stateless/01676_dictget_in_default_expression.sql index 54e46a2b718..db23ae1919c 100644 --- a/tests/queries/0_stateless/01676_dictget_in_default_expression.sql +++ b/tests/queries/0_stateless/01676_dictget_in_default_expression.sql @@ -22,7 +22,8 @@ DETACH DATABASE test_01676; ATTACH DATABASE test_01676; SELECT 'status_after_detach_and_attach:'; -SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict'; +-- It can be not loaded, or not even finish attaching in case of asynchronous tables loading. +SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict')::Nullable(String), 'NOT_LOADED'); INSERT INTO test_01676.table (x) VALUES (toInt64(4)); SELECT * FROM test_01676.table ORDER BY x; diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.reference b/tests/queries/0_stateless/01683_text_log_deadlock.reference index 4cf61460252..3805f2a95e9 100644 --- a/tests/queries/0_stateless/01683_text_log_deadlock.reference +++ b/tests/queries/0_stateless/01683_text_log_deadlock.reference @@ -1 +1 @@ -queries: 25000 +queries: 5000 diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh index 1aced61cb42..6b3bcc58868 100755 --- a/tests/queries/0_stateless/01683_text_log_deadlock.sh +++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash -# Tags: deadlock, no-tsan, no-asan +# Tags: deadlock CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_BENCHMARK --secure -i 25000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries: 25000' +$CLICKHOUSE_BENCHMARK --secure -i 5000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries: 5000' diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh index 9284348dd62..d2dcd501428 100755 --- a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh +++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh @@ -7,6 +7,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "create table insert_big_json(a String, b String) engine=MergeTree() order by tuple()"; -python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --min_chunk_bytes_for_parallel_parsing=10485760 --max_threads=0 --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: +python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --min_chunk_bytes_for_parallel_parsing=10485760 --max_threads=0 --input_format_parallel_parsing=1 --max_memory_usage=0 --max_parsing_threads=2 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: ${CLICKHOUSE_CLIENT} -q "drop table insert_big_json" diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index d0177da84d2..6c598bce440 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -16,7 +16,7 @@ select min(i), max(i), count() from d where _partition_value.1 = 10 group by _pa select min(i) from d where 1 = _partition_value.1; -- fuzz crash https://github.com/ClickHouse/ClickHouse/issues/37151 -SELECT min(i), max(i), count() FROM d WHERE (_partition_value.1) = 0 GROUP BY ignore(bitTest(ignore(NULL), 65535), NULL, (_partition_value.1) = 7, '10.25', bitTest(NULL, -9223372036854775808), NULL, ignore(ignore(-2147483647, NULL)), 1024), _partition_id ORDER BY _partition_id ASC NULLS FIRST; +SELECT min(i), max(i), count() FROM d WHERE (_partition_value.1) = 0 GROUP BY ignore(bitTest(ignore(NULL), 0), NULL, (_partition_value.1) = 7, '10.25', bitTest(NULL, 0), NULL, ignore(ignore(-2147483647, NULL)), 1024), _partition_id ORDER BY _partition_id ASC NULLS FIRST; drop table d; diff --git a/tests/queries/0_stateless/01710_projection_pk_trivial_count.reference b/tests/queries/0_stateless/01710_projection_pk_trivial_count.reference new file mode 100644 index 00000000000..43316772467 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_pk_trivial_count.reference @@ -0,0 +1,3 @@ + ReadFromMergeTree (default.x) + ReadFromPreparedSource (Optimized trivial count) +5 diff --git a/tests/queries/0_stateless/01710_projection_pk_trivial_count.sql b/tests/queries/0_stateless/01710_projection_pk_trivial_count.sql new file mode 100644 index 00000000000..ce9eadf06b2 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_pk_trivial_count.sql @@ -0,0 +1,8 @@ +drop table if exists x; + +create table x (i int) engine MergeTree order by i settings index_granularity = 3; +insert into x select * from numbers(10); +select * from (explain select count() from x where (i >= 3 and i <= 6) or i = 7) where explain like '%ReadFromPreparedSource%' or explain like '%ReadFromMergeTree%'; +select count() from x where (i >= 3 and i <= 6) or i = 7; + +drop table x; diff --git a/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql b/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql index 55c0d1e3678..a7f04921f1f 100644 --- a/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql +++ b/tests/queries/0_stateless/01760_ddl_dictionary_use_current_database_name.sql @@ -27,5 +27,5 @@ SELECT dictGet('ddl_dictionary_test', 'value', number) FROM system.numbers LIMIT SELECT 'dictHas'; SELECT dictHas('ddl_dictionary_test', number) FROM system.numbers LIMIT 3; -DROP TABLE ddl_dictonary_test_source; DROP DICTIONARY ddl_dictionary_test; +DROP TABLE ddl_dictonary_test_source; diff --git a/tests/queries/0_stateless/01760_system_dictionaries.sql b/tests/queries/0_stateless/01760_system_dictionaries.sql index a5609281e49..2e7d4184811 100644 --- a/tests/queries/0_stateless/01760_system_dictionaries.sql +++ b/tests/queries/0_stateless/01760_system_dictionaries.sql @@ -25,8 +25,8 @@ SELECT * FROM 01760_db.example_simple_key_dictionary; SELECT name, database, key.names, key.types, attribute.names, attribute.types, status FROM system.dictionaries WHERE database='01760_db'; -DROP TABLE 01760_db.example_simple_key_source; DROP DICTIONARY 01760_db.example_simple_key_dictionary; +DROP TABLE 01760_db.example_simple_key_source; SELECT name, database, key.names, key.types, attribute.names, attribute.types, status FROM system.dictionaries WHERE database='01760_db'; @@ -53,7 +53,7 @@ SELECT * FROM 01760_db.example_complex_key_dictionary; SELECT name, database, key.names, key.types, attribute.names, attribute.types, status FROM system.dictionaries WHERE database='01760_db'; -DROP TABLE 01760_db.example_complex_key_source; DROP DICTIONARY 01760_db.example_complex_key_dictionary; +DROP TABLE 01760_db.example_complex_key_source; DROP DATABASE 01760_db; diff --git a/tests/queries/0_stateless/01763_max_distributed_depth.sql b/tests/queries/0_stateless/01763_max_distributed_depth.sql index 08dc533876d..f722a88226d 100644 --- a/tests/queries/0_stateless/01763_max_distributed_depth.sql +++ b/tests/queries/0_stateless/01763_max_distributed_depth.sql @@ -17,19 +17,6 @@ ENGINE = Distributed('test_shard_localhost', '', 'tt7', rand()); DROP TABLE IF EXISTS tt7; -CREATE TABLE tt7 as tt6 ENGINE = Distributed('test_shard_localhost', '', 'tt6', rand()); - -INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } - -SELECT * FROM tt6; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } - -SET max_distributed_depth = 0; - --- stack overflow -INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError TOO_DEEP_RECURSION} - --- stack overflow -SELECT * FROM tt6; -- { serverError TOO_DEEP_RECURSION } +CREATE TABLE tt7 as tt6 ENGINE = Distributed('test_shard_localhost', '', 'tt6', rand()); -- {serverError INFINITE_LOOP} DROP TABLE tt6; -DROP TABLE tt7; diff --git a/tests/queries/0_stateless/01764_table_function_dictionary.sql b/tests/queries/0_stateless/01764_table_function_dictionary.sql index b642fdd741e..76e7213b367 100644 --- a/tests/queries/0_stateless/01764_table_function_dictionary.sql +++ b/tests/queries/0_stateless/01764_table_function_dictionary.sql @@ -23,5 +23,5 @@ LAYOUT(DIRECT()); SELECT * FROM dictionary('table_function_dictionary_test_dictionary'); -DROP TABLE table_function_dictionary_source_table; DROP DICTIONARY table_function_dictionary_test_dictionary; +DROP TABLE table_function_dictionary_source_table; diff --git a/tests/queries/0_stateless/01780_column_sparse_distinct.sql b/tests/queries/0_stateless/01780_column_sparse_distinct.sql index e98bada1aac..a0735e38f18 100644 --- a/tests/queries/0_stateless/01780_column_sparse_distinct.sql +++ b/tests/queries/0_stateless/01780_column_sparse_distinct.sql @@ -1,3 +1,5 @@ +SET optimize_trivial_insert_select = 1; + DROP TABLE IF EXISTS t_sparse_distinct; CREATE TABLE t_sparse_distinct (id UInt32, v UInt64) diff --git a/tests/queries/0_stateless/01780_column_sparse_filter.sql b/tests/queries/0_stateless/01780_column_sparse_filter.sql index f52beba50b0..245c7c121b7 100644 --- a/tests/queries/0_stateless/01780_column_sparse_filter.sql +++ b/tests/queries/0_stateless/01780_column_sparse_filter.sql @@ -1,3 +1,5 @@ +SET optimize_trivial_insert_select = 1; + DROP TABLE IF EXISTS t_sparse; CREATE TABLE t_sparse (id UInt64, u UInt64, s String) diff --git a/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql b/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql index 77e9abfb742..08a8d0feb27 100644 --- a/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql +++ b/tests/queries/0_stateless/01804_dictionary_decimal256_type.sql @@ -25,6 +25,8 @@ LAYOUT(FLAT()); SELECT 'Flat dictionary'; SELECT dictGet('flat_dictionary', 'decimal_value', toUInt64(1)); +DROP DICTIONARY flat_dictionary; + DROP DICTIONARY IF EXISTS hashed_dictionary; CREATE DICTIONARY hashed_dictionary ( diff --git a/tests/queries/0_stateless/01822_union_and_constans_error.reference b/tests/queries/0_stateless/01822_union_and_constans_error.reference index d00491fd7e5..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01822_union_and_constans_error.reference +++ b/tests/queries/0_stateless/01822_union_and_constans_error.reference @@ -1 +0,0 @@ -1 diff --git a/tests/queries/0_stateless/01822_union_and_constans_error.sql b/tests/queries/0_stateless/01822_union_and_constans_error.sql index 38b7df700cd..9017e8769eb 100644 --- a/tests/queries/0_stateless/01822_union_and_constans_error.sql +++ b/tests/queries/0_stateless/01822_union_and_constans_error.sql @@ -15,6 +15,6 @@ SELECT isNull(t0.c0) OR COUNT('\n?pVa') FROM t0 GROUP BY t0.c0 HAVING isNull(isNull(t0.c0)) -SETTINGS aggregate_functions_null_for_empty = 1, enable_optimize_predicate_expression = 0; +SETTINGS aggregate_functions_null_for_empty = 1, enable_optimize_predicate_expression = 0 format Null; drop table if exists t0; diff --git a/tests/queries/0_stateless/01825_type_json_from_map.sql b/tests/queries/0_stateless/01825_type_json_from_map.sql index 7cad50b363b..817d099c2d5 100644 --- a/tests/queries/0_stateless/01825_type_json_from_map.sql +++ b/tests/queries/0_stateless/01825_type_json_from_map.sql @@ -1,10 +1,11 @@ --- Tags: no-fasttest, no-random-merge-tree-settings +-- Tags: no-fasttest, no-random-settings, no-random-merge-tree-settings -- For example, it is 4 times slower with --merge_max_block_size=5967 --index_granularity=55 --min_bytes_for_wide_part=847510133 DROP TABLE IF EXISTS t_json; DROP TABLE IF EXISTS t_map; SET allow_experimental_object_type = 1; +SET optimize_trivial_insert_select = 1; CREATE TABLE t_json(id UInt64, obj JSON) ENGINE = MergeTree ORDER BY id; CREATE TABLE t_map(id UInt64, m Map(String, UInt64)) ENGINE = MergeTree ORDER BY id; diff --git a/tests/queries/0_stateless/01825_type_json_sparse.sql b/tests/queries/0_stateless/01825_type_json_sparse.sql index cc7c66382a3..69ca1ff8406 100644 --- a/tests/queries/0_stateless/01825_type_json_sparse.sql +++ b/tests/queries/0_stateless/01825_type_json_sparse.sql @@ -3,6 +3,7 @@ DROP TABLE IF EXISTS t_json_sparse; SET allow_experimental_object_type = 1; +SET optimize_trivial_insert_select = 1; CREATE TABLE t_json_sparse (data JSON) ENGINE = MergeTree ORDER BY tuple() diff --git a/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql b/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql index d5108e98510..da364403893 100644 --- a/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql +++ b/tests/queries/0_stateless/01852_dictionary_found_rate_long.sql @@ -310,6 +310,6 @@ SELECT name, found_rate FROM system.dictionaries WHERE database = currentDatabas SELECT tuple(x, y) as key, dictGet('polygon_dictionary_01862', 'name', key) FROM points_01862 FORMAT Null; SELECT name, found_rate FROM system.dictionaries WHERE database = currentDatabase() AND name = 'polygon_dictionary_01862'; +DROP DICTIONARY polygon_dictionary_01862; DROP TABLE polygons_01862; DROP TABLE points_01862; -DROP DICTIONARY polygon_dictionary_01862; diff --git a/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql b/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql index 4c623941a19..d28f9e5c4e6 100644 --- a/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql +++ b/tests/queries/0_stateless/01904_dictionary_default_nullable_type.sql @@ -111,6 +111,8 @@ LAYOUT(IP_TRIE()); SELECT 'IPTrie dictionary'; SELECT dictGet('ip_trie_dictionary', 'value', tuple(IPv4StringToNum('127.0.0.0'))); --{serverError UNSUPPORTED_METHOD} +DROP DICTIONARY ip_trie_dictionary; + DROP TABLE dictionary_nullable_source_table; DROP TABLE dictionary_nullable_default_source_table; diff --git a/tests/queries/0_stateless/01910_view_dictionary.sql b/tests/queries/0_stateless/01910_view_dictionary.sql index 1f9928735b4..05a67889825 100644 --- a/tests/queries/0_stateless/01910_view_dictionary.sql +++ b/tests/queries/0_stateless/01910_view_dictionary.sql @@ -45,5 +45,5 @@ FROM numbers(3); DROP TABLE dictionary_source_en; DROP TABLE dictionary_source_ru; -DROP TABLE dictionary_source_view; DROP DICTIONARY flat_dictionary; +DROP TABLE dictionary_source_view; diff --git a/tests/queries/0_stateless/01921_test_progress_bar.py b/tests/queries/0_stateless/01921_test_progress_bar.py index 54c7ae59894..6406534a647 100755 --- a/tests/queries/0_stateless/01921_test_progress_bar.py +++ b/tests/queries/0_stateless/01921_test_progress_bar.py @@ -15,6 +15,6 @@ log = None with client(name="client1>", log=log) as client1: client1.expect(prompt) client1.send("SELECT number FROM numbers(1000) FORMAT Null") - client1.expect("Progress: 1\.00 thousand rows, 8\.00 KB .*" + end_of_block) - client1.expect("0 rows in set. Elapsed: [\\w]{1}\.[\\w]{3} sec.") + client1.expect("Progress: 1\\.00 thousand rows, 8\\.00 KB .*" + end_of_block) + client1.expect("0 rows in set. Elapsed: [\\w]{1}\\.[\\w]{3} sec.") client1.expect("Peak memory usage: .*B" + end_of_block) diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql b/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql index 1090179bb67..6cce4863c15 100644 --- a/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql @@ -1,3 +1,4 @@ +SET allow_deprecated_snowflake_conversion_functions = 1; -- Force-enable deprecated snowflake conversion functions (in case this is randomized in CI) SET session_timezone = 'Africa/Juba'; -- Error cases @@ -10,6 +11,9 @@ SELECT dateTime64ToSnowflake('abc'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} SELECT dateTimeToSnowflake('abc', 123); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} SELECT dateTime64ToSnowflake('abc', 123); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTimeToSnowflake(now()) SETTINGS allow_deprecated_snowflake_conversion_functions = 0; -- { serverError DEPRECATED_FUNCTION } +SELECT dateTime64ToSnowflake(now64()) SETTINGS allow_deprecated_snowflake_conversion_functions = 0; -- { serverError DEPRECATED_FUNCTION } + SELECT '-- const / non-const inputs'; WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference new file mode 100644 index 00000000000..5dcd0c9dfcd --- /dev/null +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference @@ -0,0 +1,32 @@ +-- Negative tests +-- Return type +UInt64 +UInt64 +-- Standard and twitter epoch +Row 1: +────── +dt: 2021-08-15 18:57:56 +dt64: 2021-08-15 18:57:56.492 +dateTimeToSnowflakeID(dt): 6832747188322304000 +dateTime64ToSnowflakeID(dt64): 6832747190385901568 +dateTimeToSnowflakeID(dt, twitter_epoch): 1426981498778550272 +dateTime64ToSnowflakeID(dt64, twitter_epoch): 1426981500842147840 +-- Different DateTime64 scales +Row 1: +────── +dateTime64ToSnowflakeID(dt64_0): 6832747188322304000 +dateTime64ToSnowflakeID(dt64_1): 6832747190000025600 +dateTime64ToSnowflakeID(dt64_2): 6832747190377512960 +dateTime64ToSnowflakeID(dt64_3): 6832747190385901568 +dateTime64ToSnowflakeID(dt64_4): 6832747190385901568 +-- Idempotency +Row 1: +────── +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0), 0, 'UTC'), dt64_0): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1), 0, 'UTC'), dt64_1): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2), 0, 'UTC'), dt64_2): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3), 0, 'UTC'), dt64_3): 1 +Row 1: +────── +dt64_4: 2023-11-11 11:11:11.1231 +snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4)): 2023-11-11 11:11:11.123 diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql new file mode 100644 index 00000000000..945b399157f --- /dev/null +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql @@ -0,0 +1,74 @@ +SET session_timezone = 'UTC'; -- disable timezone randomization +SET allow_experimental_analyzer = 1; -- The old path formats the result with different whitespaces + +SELECT '-- Negative tests'; +SELECT dateTimeToSnowflakeID(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTime64ToSnowflakeID(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTimeToSnowflakeID('invalid_dt'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTime64ToSnowflakeID('invalid_dt'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTimeToSnowflakeID(now(), 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTime64ToSnowflakeID(now64(), 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTimeToSnowflakeID(now(), 42, 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTime64ToSnowflakeID(now64(), 42, 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +SELECT '-- Return type'; +SELECT toTypeName(dateTimeToSnowflakeID(now())); +SELECT toTypeName(dateTime64ToSnowflakeID(now64())); + +SELECT '-- Standard and twitter epoch'; + +WITH + toDateTime('2021-08-15 18:57:56') AS dt, + toDateTime64('2021-08-15 18:57:56.492', 3) AS dt64, + 1288834974657 AS twitter_epoch +SELECT + dt, + dt64, + dateTimeToSnowflakeID(dt), + dateTime64ToSnowflakeID(dt64), + dateTimeToSnowflakeID(dt, twitter_epoch), + dateTime64ToSnowflakeID(dt64, twitter_epoch) +FORMAT + Vertical; + +SELECT '-- Different DateTime64 scales'; + +WITH + toDateTime64('2021-08-15 18:57:56.492', 0, 'UTC') AS dt64_0, + toDateTime64('2021-08-15 18:57:56.492', 1, 'UTC') AS dt64_1, + toDateTime64('2021-08-15 18:57:56.492', 2, 'UTC') AS dt64_2, + toDateTime64('2021-08-15 18:57:56.492', 3, 'UTC') AS dt64_3, + toDateTime64('2021-08-15 18:57:56.492', 4, 'UTC') AS dt64_4 +SELECT + dateTime64ToSnowflakeID(dt64_0), + dateTime64ToSnowflakeID(dt64_1), + dateTime64ToSnowflakeID(dt64_2), + dateTime64ToSnowflakeID(dt64_3), + dateTime64ToSnowflakeID(dt64_4) +Format + Vertical; + +SELECT '-- Idempotency'; + + -- DateTime64-to-SnowflakeID-to-DateTime64 is idempotent if the scale is <=3 (millisecond precision) +WITH + now64(0) AS dt64_0, + now64(1) AS dt64_1, + now64(2) AS dt64_2, + now64(3) AS dt64_3 +SELECT + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0), 0, 'UTC') == dt64_0, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1), 0, 'UTC') == dt64_1, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2), 0, 'UTC') == dt64_2, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3), 0, 'UTC') == dt64_3 +FORMAT + Vertical; + +-- not idempotent +WITH + toDateTime64('2023-11-11 11:11:11.1231', 4, 'UTC') AS dt64_4 +SELECT + dt64_4, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4)) +FORMAT + Vertical; diff --git a/tests/queries/0_stateless/01942_snowflakeIDToDateTime.reference b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.reference new file mode 100644 index 00000000000..9ed8c1dd3e5 --- /dev/null +++ b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.reference @@ -0,0 +1,27 @@ +-- Negative tests +-- Return type +DateTime +DateTime64(3) +-- Non-const path +Row 1: +────── +sf: 7204436857747984384 +dt: 2024-06-06 10:59:58 +dt64: 2024-06-06 10:59:58.851 +Row 1: +────── +sf: 1426981498778550272 +dt: 2021-08-15 18:57:56 +dt64: 2021-08-15 18:57:56.000 +Row 1: +────── +sf: 7204436857747984384 +dt: 2024-06-06 18:59:58 +dt64: 2024-06-06 18:59:58.851 +-- Const path +Row 1: +────── +sf: 7204436857747984384 +dt: 2024-06-06 10:59:58 +dt64: 2024-06-06 10:59:58.851 +-- Can be combined with generateSnowflakeID diff --git a/tests/queries/0_stateless/01942_snowflakeIDToDateTime.sql b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.sql new file mode 100644 index 00000000000..48316691c71 --- /dev/null +++ b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.sql @@ -0,0 +1,82 @@ +SET session_timezone = 'UTC'; -- disable timezone randomization +SET allow_experimental_analyzer = 1; -- The old path formats the result with different whitespaces + +SELECT '-- Negative tests'; +SELECT snowflakeIDToDateTime(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT snowflakeIDToDateTime64(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT snowflakeIDToDateTime('invalid_snowflake'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime64('invalid_snowflake'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime(123::UInt64, 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime64(123::UInt64, 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime(123::UInt64, materialize(42)); -- {serverError ILLEGAL_COLUMN} +SELECT snowflakeIDToDateTime64(123::UInt64, materialize(42)); -- {serverError ILLEGAL_COLUMN} +SELECT snowflakeIDToDateTime(123::UInt64, 42, 42); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime64(123::UInt64, 42, 42); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime(123::UInt64, 42, 'UTC', 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT snowflakeIDToDateTime64(123::UInt64, 42, 'UTC', 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +SELECT '-- Return type'; +SELECT toTypeName(snowflakeIDToDateTime(123::UInt64)); +SELECT toTypeName(snowflakeIDToDateTime64(123::UInt64)); + +SELECT '-- Non-const path'; +-- Two const arguments are mapped to two non-const arguments ('getDefaultImplementationForConstants'), the non-const path is taken + +WITH + 7204436857747984384 AS sf +SELECT + sf, + snowflakeIDToDateTime(sf) as dt, + snowflakeIDToDateTime64(sf) as dt64 +FORMAT + Vertical; + +-- With Twitter Snowflake ID and Twitter epoch +WITH + 1426981498778550272 AS sf, + 1288834974657 AS epoch +SELECT + sf, + snowflakeIDToDateTime(sf, epoch) as dt, + snowflakeIDToDateTime64(sf, epoch) as dt64 +FORMAT + Vertical; + +-- non-default timezone +WITH + 7204436857747984384 AS sf, + 0 AS epoch, -- default epoch + 'Asia/Shanghai' AS tz +SELECT + sf, + snowflakeIDToDateTime(sf, epoch, tz) as dt, + snowflakeIDToDateTime64(sf, epoch, tz) as dt64 +FORMAT + Vertical; + +SELECT '-- Const path'; + +-- The const path can only be tested by const snowflake + const epoch + non-const time-zone. The latter requires a special setting. +WITH + 7204436857747984384 AS sf, + 0 AS epoch, -- default epoch + materialize('Asia/Shanghai') AS tz +SELECT + sf, + snowflakeIDToDateTime(sf, epoch, tz) as dt, + snowflakeIDToDateTime64(sf, epoch, tz) as dt64 +FORMAT + Vertical +SETTINGS + allow_nonconst_timezone_arguments = 1; + + +SELECT '-- Can be combined with generateSnowflakeID'; + +WITH + generateSnowflakeID() AS snowflake +SELECT + snowflakeIDToDateTime(snowflake), + snowflakeIDToDateTime64(snowflake) +FORMAT + Null; diff --git a/tests/queries/0_stateless/01942_snowflakeToDateTime.sql b/tests/queries/0_stateless/01942_snowflakeToDateTime.sql index f1a50dd370d..34fe15ec187 100644 --- a/tests/queries/0_stateless/01942_snowflakeToDateTime.sql +++ b/tests/queries/0_stateless/01942_snowflakeToDateTime.sql @@ -1,4 +1,6 @@ --- -- Error cases +SET allow_deprecated_snowflake_conversion_functions = 1; -- Force-enable deprecated snowflake conversion functions (in case this is randomized in CI) + +-- Error cases SELECT snowflakeToDateTime(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} SELECT snowflakeToDateTime64(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} @@ -8,32 +10,35 @@ SELECT snowflakeToDateTime64('abc'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} SELECT snowflakeToDateTime('abc', 123); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} SELECT snowflakeToDateTime64('abc', 123); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeToDateTime(123::Int64) SETTINGS allow_deprecated_snowflake_conversion_functions = 0; -- { serverError DEPRECATED_FUNCTION } +SELECT snowflakeToDateTime64(123::Int64) SETTINGS allow_deprecated_snowflake_conversion_functions = 0; -- { serverError DEPRECATED_FUNCTION } + SELECT 'const column'; WITH - CAST(1426860704886947840 AS Int64) AS i64, - 'UTC' AS tz + CAST(1426860704886947840 AS Int64) AS i64, + 'UTC' AS tz SELECT - tz, - i64, - snowflakeToDateTime(i64, tz) as dt, - toTypeName(dt), - snowflakeToDateTime64(i64, tz) as dt64, - toTypeName(dt64); + tz, + i64, + snowflakeToDateTime(i64, tz) as dt, + toTypeName(dt), + snowflakeToDateTime64(i64, tz) as dt64, + toTypeName(dt64); WITH - CAST(1426860704886947840 AS Int64) AS i64, - 'Asia/Shanghai' AS tz + CAST(1426860704886947840 AS Int64) AS i64, + 'Asia/Shanghai' AS tz SELECT - tz, - i64, - snowflakeToDateTime(i64, tz) as dt, - toTypeName(dt), - snowflakeToDateTime64(i64, tz) as dt64, - toTypeName(dt64); + tz, + i64, + snowflakeToDateTime(i64, tz) as dt, + toTypeName(dt), + snowflakeToDateTime64(i64, tz) as dt64, + toTypeName(dt64); DROP TABLE IF EXISTS tab; -CREATE TABLE tab(val Int64, tz String) engine=Log; +CREATE TABLE tab(val Int64, tz String) engine = Log; INSERT INTO tab VALUES (42, 'Asia/Singapore'); SELECT 1 FROM tab WHERE snowflakeToDateTime(42::Int64, tz) != now() SETTINGS allow_nonconst_timezone_arguments = 1; diff --git a/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql index 72cac481376..ea2dad5c732 100644 --- a/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql +++ b/tests/queries/0_stateless/02008_complex_key_range_hashed_dictionary.sql @@ -53,8 +53,8 @@ SELECT CountryID, StartDate, Tax FROM range_dictionary ORDER BY CountryID, Start SELECT 'onlySpecificColumn'; SELECT Tax FROM range_dictionary ORDER BY CountryID, StartDate, EndDate; -DROP TABLE date_table; DROP DICTIONARY range_dictionary; +DROP TABLE date_table; CREATE TABLE date_table ( @@ -107,5 +107,5 @@ SELECT CountryID, StartDate, Tax FROM range_dictionary_nullable ORDER BY Country SELECT 'onlySpecificColumn'; SELECT Tax FROM range_dictionary_nullable ORDER BY CountryID, StartDate, EndDate; -DROP TABLE date_table; DROP DICTIONARY range_dictionary_nullable; +DROP TABLE date_table; diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index d625feb94d3..8f62eda9233 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -4,7 +4,7 @@ CREATE TABLE system.aggregate_function_combinators `is_internal` UInt8 ) ENGINE = SystemAggregateFunctionCombinators -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all available aggregate function combinators, which could be applied to aggregate functions and change the way they work.' CREATE TABLE system.asynchronous_inserts ( `query` String, @@ -17,7 +17,7 @@ CREATE TABLE system.asynchronous_inserts `entries.bytes` Array(UInt64) ) ENGINE = SystemAsynchronousInserts -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about pending asynchronous inserts in queue in server\'s memory.' CREATE TABLE system.asynchronous_metrics ( `metric` String, @@ -25,14 +25,14 @@ CREATE TABLE system.asynchronous_metrics `description` String ) ENGINE = SystemAsynchronousMetrics -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use.' CREATE TABLE system.build_options ( `name` String, `value` String ) ENGINE = SystemBuildOptions -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all build flags, compiler options and commit hash for used build.' CREATE TABLE system.clusters ( `cluster` String, @@ -55,14 +55,14 @@ CREATE TABLE system.clusters `name` String ALIAS cluster ) ENGINE = SystemClusters -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about clusters defined in the configuration file or generated by a Replicated database.' CREATE TABLE system.collations ( `name` String, `language` Nullable(String) ) ENGINE = SystemTableCollations -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all available collations for alphabetical comparison of strings.' CREATE TABLE system.columns ( `database` String, @@ -88,13 +88,13 @@ CREATE TABLE system.columns `datetime_precision` Nullable(UInt64) ) ENGINE = SystemColumns -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Lists all columns from all tables of the current server.' CREATE TABLE system.contributors ( `name` String ) ENGINE = SystemContributors -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all ClickHouse contributors <3' CREATE TABLE system.current_roles ( `role_name` String, @@ -102,7 +102,7 @@ CREATE TABLE system.current_roles `is_default` UInt8 ) ENGINE = SystemCurrentRoles -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains active roles of a current user. SET ROLE changes the contents of this table.' CREATE TABLE system.data_skipping_indices ( `database` String, @@ -117,7 +117,7 @@ CREATE TABLE system.data_skipping_indices `marks` UInt64 ) ENGINE = SystemDataSkippingIndices -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains all the information about all the data skipping indices in tables, similar to system.columns.' CREATE TABLE system.data_type_families ( `name` String, @@ -125,7 +125,7 @@ CREATE TABLE system.data_type_families `alias_to` String ) ENGINE = SystemTableDataTypeFamilies -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all available native data types along with all the aliases used for compatibility with other DBMS.' CREATE TABLE system.databases ( `name` String, @@ -138,7 +138,7 @@ CREATE TABLE system.databases `database` String ALIAS name ) ENGINE = SystemDatabases -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Lists all databases of the current server.' CREATE TABLE system.detached_parts ( `database` String, @@ -155,7 +155,7 @@ CREATE TABLE system.detached_parts `level` Nullable(UInt32) ) ENGINE = SystemDetachedParts -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all parts which are being found in /detached directory along with a reason why it was detached. ClickHouse server doesn\'t use such parts anyhow.' CREATE TABLE system.dictionaries ( `database` String, @@ -185,7 +185,7 @@ CREATE TABLE system.dictionaries `comment` String ) ENGINE = SystemDictionaries -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about dictionaries.' CREATE TABLE system.disks ( `name` String, @@ -205,7 +205,7 @@ CREATE TABLE system.disks `cache_path` String ) ENGINE = SystemDisks -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about disks defined in the server configuration.' CREATE TABLE system.distributed_ddl_queue ( `entry` String, @@ -225,7 +225,7 @@ CREATE TABLE system.distributed_ddl_queue `query_duration_ms` Nullable(UInt64) ) ENGINE = SystemDDLWorkerQueue -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about distributed DDL queries (ON CLUSTER clause) that were executed on a cluster.' CREATE TABLE system.distribution_queue ( `database` String, @@ -241,7 +241,7 @@ CREATE TABLE system.distribution_queue `last_exception_time` DateTime ) ENGINE = SystemDistributionQueue -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about local files that are in the queue to be sent to the shards. These local files contain new parts that are created by inserting new data into the Distributed table in asynchronous mode.' CREATE TABLE system.enabled_roles ( `role_name` String, @@ -250,7 +250,7 @@ CREATE TABLE system.enabled_roles `is_default` UInt8 ) ENGINE = SystemEnabledRoles -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains all active roles at the moment, including current role of the current user and granted roles for current role.' CREATE TABLE system.errors ( `name` String, @@ -262,7 +262,7 @@ CREATE TABLE system.errors `remote` UInt8 ) ENGINE = SystemErrors -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all errors which have ever happened including the error code, last time and message with unsymbolized stacktrace.' CREATE TABLE system.events ( `event` String, @@ -271,7 +271,7 @@ CREATE TABLE system.events `name` String ALIAS event ) ENGINE = SystemEvents -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains profiling events and their current value.' CREATE TABLE system.formats ( `name` String, @@ -281,7 +281,7 @@ CREATE TABLE system.formats `supports_parallel_formatting` UInt8 ) ENGINE = SystemFormats -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all the formats along with flags whether a format is suitable for input/output or whether it supports parallelization.' CREATE TABLE system.functions ( `name` String, @@ -299,7 +299,7 @@ CREATE TABLE system.functions `categories` String ) ENGINE = SystemFunctions -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all available ordinary and aggregate functions with their descriptions.' CREATE TABLE system.graphite_retentions ( `config_name` String, @@ -314,7 +314,7 @@ CREATE TABLE system.graphite_retentions `Tables.table` Array(String) ) ENGINE = SystemGraphite -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about parameters graphite_rollup which are used in tables with *GraphiteMergeTree engines.' CREATE TABLE system.licenses ( `library_name` String, @@ -323,14 +323,14 @@ CREATE TABLE system.licenses `license_text` String ) ENGINE = SystemLicenses -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains licenses of third-party libraries that are located in the contrib directory of ClickHouse sources.' CREATE TABLE system.macros ( `macro` String, `substitution` String ) ENGINE = SystemMacros -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all macros defined in server configuration.' CREATE TABLE system.merge_tree_settings ( `name` String, @@ -344,7 +344,7 @@ CREATE TABLE system.merge_tree_settings `is_obsolete` UInt8 ) ENGINE = SystemMergeTreeSettings -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all MergeTree engine specific settings, their current and default values along with descriptions. You may change any of them in SETTINGS section in CREATE query.' CREATE TABLE system.merges ( `database` String, @@ -373,7 +373,7 @@ CREATE TABLE system.merges `merge_algorithm` String ) ENGINE = SystemMerges -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of merges currently executing merges of MergeTree tables and their progress. Each merge operation is represented by a single row.' CREATE TABLE system.metrics ( `metric` String, @@ -382,7 +382,7 @@ CREATE TABLE system.metrics `name` String ALIAS metric ) ENGINE = SystemMetrics -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date.' CREATE TABLE system.moves ( `database` String, @@ -395,7 +395,7 @@ CREATE TABLE system.moves `thread_id` UInt64 ) ENGINE = SystemMoves -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about in-progress data part moves of MergeTree tables. Each data part movement is represented by a single row.' CREATE TABLE system.mutations ( `database` String, @@ -414,25 +414,25 @@ CREATE TABLE system.mutations `latest_fail_reason` String ) ENGINE = SystemMutations -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of mutations and their progress. Each mutation command is represented by a single row.' CREATE TABLE system.numbers ( `number` UInt64 ) ENGINE = SystemNumbers -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Generates all natural numbers, starting from 0 (to 2^64 - 1, and then again) in sorted order.' CREATE TABLE system.numbers_mt ( `number` UInt64 ) ENGINE = SystemNumbers -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Multithreaded version of `system.numbers`. Numbers order is not guaranteed.' CREATE TABLE system.one ( `dummy` UInt8 ) ENGINE = SystemOne -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'This table contains a single row with a single dummy UInt8 column containing the value 0. Used when the table is not specified explicitly, for example in queries like `SELECT 1`.' CREATE TABLE system.part_moves_between_shards ( `database` String, @@ -451,7 +451,7 @@ CREATE TABLE system.part_moves_between_shards `last_exception` String ) ENGINE = SystemShardMoves -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about parts which are currently in a process of moving between shards and their progress.' CREATE TABLE system.parts ( `partition` String, @@ -522,7 +522,7 @@ CREATE TABLE system.parts `part_name` String ALIAS name ) ENGINE = SystemParts -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of currently existing (both active and inactive) parts of all *-MergeTree tables. Each part is represented by a single row.' CREATE TABLE system.parts_columns ( `partition` String, @@ -582,7 +582,7 @@ CREATE TABLE system.parts_columns `part_name` String ALIAS name ) ENGINE = SystemPartsColumns -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of columns of all currently existing parts of all MergeTree tables. Each column is represented by a single row.' CREATE TABLE system.processes ( `is_initial_query` UInt8, @@ -630,7 +630,7 @@ CREATE TABLE system.processes `Settings.Values` Array(String) ALIAS mapValues(Settings) ) ENGINE = SystemProcesses -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of currently executing processes (queries) with their progress.' CREATE TABLE system.projection_parts ( `partition` String, @@ -698,7 +698,7 @@ CREATE TABLE system.projection_parts `part_name` String ALIAS name ) ENGINE = SystemProjectionParts -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of currently existing projection parts (a copy of some part containing aggregated data or just sorted in different order) created for all the projections for all tables within a cluster.' CREATE TABLE system.projection_parts_columns ( `partition` String, @@ -754,7 +754,7 @@ CREATE TABLE system.projection_parts_columns `part_name` String ALIAS name ) ENGINE = SystemProjectionPartsColumns -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of columns of all currently existing projection parts of all MergeTree tables. Each column is represented by a single row.' CREATE TABLE system.quota_limits ( `quota_name` String, @@ -773,7 +773,7 @@ CREATE TABLE system.quota_limits `max_failed_sequential_authentications` Nullable(UInt64) ) ENGINE = SystemQuotaLimits -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about maximums for all intervals of all quotas. Any number of rows or zero can correspond to specific quota.' CREATE TABLE system.quota_usage ( `quota_name` String, @@ -805,7 +805,7 @@ CREATE TABLE system.quota_usage `max_failed_sequential_authentications` Nullable(UInt64) ) ENGINE = SystemQuotaUsage -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains quota usage by the current user: how much is used and how much is left.' CREATE TABLE system.quotas ( `name` String, @@ -818,7 +818,7 @@ CREATE TABLE system.quotas `apply_to_except` Array(String) ) ENGINE = SystemQuotas -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about quotas.' CREATE TABLE system.quotas_usage ( `quota_name` String, @@ -851,7 +851,7 @@ CREATE TABLE system.quotas_usage `max_failed_sequential_authentications` Nullable(UInt64) ) ENGINE = SystemQuotasUsage -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains quota usage by all users.' CREATE TABLE system.replicas ( `database` String, @@ -891,7 +891,7 @@ CREATE TABLE system.replicas `replica_is_active` Map(String, UInt8) ) ENGINE = SystemReplicas -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information and status of all table replicas on current server. Each replica is represented by a single row.' CREATE TABLE system.replicated_fetches ( `database` String, @@ -912,7 +912,7 @@ CREATE TABLE system.replicated_fetches `thread_id` UInt64 ) ENGINE = SystemReplicatedFetches -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about currently running background fetches.' CREATE TABLE system.replicated_merge_tree_settings ( `name` String, @@ -926,7 +926,7 @@ CREATE TABLE system.replicated_merge_tree_settings `is_obsolete` UInt8 ) ENGINE = SystemReplicatedMergeTreeSettings -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all ReplicatedMergeTree engine specific settings, their current and default values along with descriptions. You may change any of them in SETTINGS section in CREATE query. ' CREATE TABLE system.replication_queue ( `database` String, @@ -952,7 +952,7 @@ CREATE TABLE system.replication_queue `merge_type` String ) ENGINE = SystemReplicationQueue -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about tasks from replication queues stored in ClickHouse Keeper, or ZooKeeper, for each table replica.' CREATE TABLE system.role_grants ( `user_name` Nullable(String), @@ -963,7 +963,7 @@ CREATE TABLE system.role_grants `with_admin_option` UInt8 ) ENGINE = SystemRoleGrants -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains the role grants for users and roles. To add entries to this table, use `GRANT role TO user`. Using this table you may find out which roles are assigned to which users or which roles a user has.' CREATE TABLE system.roles ( `name` String, @@ -971,7 +971,7 @@ CREATE TABLE system.roles `storage` String ) ENGINE = SystemRoles -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all roles created at the server.' CREATE TABLE system.row_policies ( `name` String, @@ -987,7 +987,7 @@ CREATE TABLE system.row_policies `apply_to_except` Array(String) ) ENGINE = SystemRowPolicies -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains filters for one particular table, as well as a list of roles and/or users which should use this row policy.' CREATE TABLE system.settings ( `name` String, @@ -1003,7 +1003,7 @@ CREATE TABLE system.settings `is_obsolete` UInt8 ) ENGINE = SystemSettings -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all user-level settings (which can be modified in a scope of query or session), their current and default values along with descriptions.' CREATE TABLE system.settings_profile_elements ( `profile_name` Nullable(String), @@ -1018,7 +1018,7 @@ CREATE TABLE system.settings_profile_elements `inherit_profile` Nullable(String) ) ENGINE = SystemSettingsProfileElements -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Describes the content of each settings profile configured on the server. Including settings constraints, roles and users for which the settings are applied, and parent settings profiles.' CREATE TABLE system.settings_profiles ( `name` String, @@ -1030,7 +1030,7 @@ CREATE TABLE system.settings_profiles `apply_to_except` Array(String) ) ENGINE = SystemSettingsProfiles -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains properties of configured setting profiles.' CREATE TABLE system.stack_trace ( `thread_name` String, @@ -1039,7 +1039,7 @@ CREATE TABLE system.stack_trace `trace` Array(UInt64) ) ENGINE = SystemStackTrace -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Allows to obtain an unsymbolized stacktrace from all the threads of the server process.' CREATE TABLE system.storage_policies ( `policy_name` String, @@ -1054,7 +1054,7 @@ CREATE TABLE system.storage_policies `load_balancing` Enum8('ROUND_ROBIN' = 0, 'LEAST_USED' = 1) ) ENGINE = SystemStoragePolicies -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains information about storage policies and volumes defined in the server configuration.' CREATE TABLE system.table_engines ( `name` String, @@ -1068,7 +1068,7 @@ CREATE TABLE system.table_engines `supports_parallel_insert` UInt8 ) ENGINE = SystemTableEngines -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all available table engines along with information whether a particular table engine supports some specific features (e.g. settings, skipping indices, projections, replication, TTL, deduplication, parallel insert, etc.)' CREATE TABLE system.table_functions ( `name` String, @@ -1076,7 +1076,7 @@ CREATE TABLE system.table_functions `allow_readonly` UInt8 ) ENGINE = SystemTableFunctions -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all available table functions with their descriptions.' CREATE TABLE system.tables ( `database` String, @@ -1115,13 +1115,13 @@ CREATE TABLE system.tables `table` String ALIAS name ) ENGINE = SystemTables -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Lists all tables of the current server.' CREATE TABLE system.time_zones ( `time_zone` String ) ENGINE = SystemTimeZones -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of time zones that are supported by the ClickHouse server. This list of timezones might vary depending on the version of ClickHouse.' CREATE TABLE system.user_directories ( `name` String, @@ -1130,13 +1130,13 @@ CREATE TABLE system.user_directories `precedence` UInt64 ) ENGINE = SystemUserDirectories -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains the information about configured user directories - directories on the file system from which ClickHouse server is allowed to read user provided data.' CREATE TABLE system.users ( `name` String, `id` UUID, `storage` String, - `auth_type` Enum8('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7, 'ssh_key' = 8, 'http' = 9), + `auth_type` Enum8('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7, 'ssh_key' = 8, 'http' = 9, 'jwt' = 10), `auth_params` String, `host_ip` Array(String), `host_names` Array(String), @@ -1151,22 +1151,22 @@ CREATE TABLE system.users `default_database` String ) ENGINE = SystemUsers -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains a list of all users profiles either configured at the server through a configuration file or created via SQL.' CREATE TABLE system.warnings ( `message` String ) ENGINE = SystemWarnings -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Contains warnings about server configuration to be displayed by clickhouse-client right after it connects to the server.' CREATE TABLE system.zeros ( `zero` UInt8 ) ENGINE = SystemZeros -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Produces unlimited number of non-materialized zeros.' CREATE TABLE system.zeros_mt ( `zero` UInt8 ) ENGINE = SystemZeros -COMMENT 'SYSTEM TABLE is built on the fly.' +COMMENT 'Multithreaded version of system.zeros.' diff --git a/tests/queries/0_stateless/02118_show_create_table_rocksdb.reference b/tests/queries/0_stateless/02118_show_create_table_rocksdb.reference index 848abb332bb..e1e1ab3f441 100644 --- a/tests/queries/0_stateless/02118_show_create_table_rocksdb.reference +++ b/tests/queries/0_stateless/02118_show_create_table_rocksdb.reference @@ -1 +1 @@ -CREATE TABLE system.rocksdb\n(\n `database` String,\n `table` String,\n `name` String,\n `value` UInt64\n)\nENGINE = SystemRocksDB\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.rocksdb\n(\n `database` String,\n `table` String,\n `name` String,\n `value` UInt64\n)\nENGINE = SystemRocksDB\nCOMMENT \'Contains a list of metrics exposed from embedded RocksDB.\' diff --git a/tests/queries/0_stateless/02139_MV_with_scalar_subquery.sql b/tests/queries/0_stateless/02139_MV_with_scalar_subquery.sql index f0285bbec3d..63c894cfb85 100644 --- a/tests/queries/0_stateless/02139_MV_with_scalar_subquery.sql +++ b/tests/queries/0_stateless/02139_MV_with_scalar_subquery.sql @@ -16,7 +16,7 @@ SELECT FROM source_null GROUP BY count_subquery, min_subquery, max_subquery; - +SET optimize_trivial_insert_select = 1; INSERT INTO source SELECT number FROM numbers(2000) SETTINGS min_insert_block_size_rows=1500, max_insert_block_size=1500; SELECT count() FROM source; diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql index 0834b76d4ec..ae8c39b49bc 100644 --- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql +++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.sql @@ -1,5 +1,6 @@ SET max_threads=0; SET optimize_read_in_order=1; +SET optimize_trivial_insert_select = 1; SET read_in_order_two_level_merge_threshold=100; DROP TABLE IF EXISTS t_read_in_order; diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference deleted file mode 100644 index 1fc09c8d154..00000000000 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference +++ /dev/null @@ -1,16 +0,0 @@ -Checking input_format_parallel_parsing=false& -1 -Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1 -1 -Checking input_format_parallel_parsing=false&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=true& -1 -Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1 -1 -Checking input_format_parallel_parsing=true&send_progress_in_http_headers=true -1 -Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true -1 diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh deleted file mode 100755 index 5494f7d59cb..00000000000 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-tsan, no-cpu-aarch64, no-parallel, no-debug -# TSan does not supports tracing. -# trace_log doesn't work on aarch64 - -# Regression for proper release of Context, -# via tracking memory of external tables. - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -tmp_file=$(mktemp "$CURDIR/clickhouse.XXXXXX.csv") -trap 'rm $tmp_file' EXIT - -$CLICKHOUSE_CLIENT -q "SELECT toString(number) FROM numbers(1e6) FORMAT TSV" > "$tmp_file" - -function run_and_check() -{ - local query_id - query_id="$(${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SELECT generateUUIDv4()')" - - echo "Checking $*" - - # Run query with external table (implicit StorageMemory user) - $CLICKHOUSE_CURL -sS -F "s=@$tmp_file;" "$CLICKHOUSE_URL&s_structure=key+Int&query=SELECT+count()+FROM+s&memory_profiler_sample_probability=1&max_untracked_memory=0&query_id=$query_id&$*" -o /dev/null - - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SYSTEM FLUSH LOGS' - - # Check that temporary table had been destroyed. - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&allow_introspection_functions=1" --data-binary @- <<<" - WITH arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS sym - SELECT 1 FROM system.trace_log - PREWHERE - query_id = '$query_id' AND - trace_type = 'MemorySample' AND - /* only deallocations */ - size < 0 AND - event_date >= yesterday() - WHERE - sym LIKE '%DB::StorageMemory::drop%\n%TemporaryTableHolder::~TemporaryTableHolder%' - LIMIT 1 - " -} - -for input_format_parallel_parsing in false true; do - query_args_variants=( - "" - "cancel_http_readonly_queries_on_client_close=1&readonly=1" - "send_progress_in_http_headers=true" - # nested progress callback - "cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true" - ) - for query_args in "${query_args_variants[@]}"; do - run_and_check "input_format_parallel_parsing=$input_format_parallel_parsing&$query_args" - done -done diff --git a/tests/queries/0_stateless/02154_parser_backtracking.sh b/tests/queries/0_stateless/02154_parser_backtracking.sh index fd227bcfc56..72121d14dfa 100755 --- a/tests/queries/0_stateless/02154_parser_backtracking.sh +++ b/tests/queries/0_stateless/02154_parser_backtracking.sh @@ -1,4 +1,6 @@ #!/usr/bin/env bash +# Tags: no-tsan +# ^ TSan uses more stack CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02155_dictionary_comment.sql b/tests/queries/0_stateless/02155_dictionary_comment.sql index 30b85e16a7c..8ebc7b259fc 100644 --- a/tests/queries/0_stateless/02155_dictionary_comment.sql +++ b/tests/queries/0_stateless/02155_dictionary_comment.sql @@ -49,5 +49,5 @@ SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary_vie SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary_view' AND database == currentDatabase(); DROP TABLE 02155_test_dictionary_view; -DROP TABLE 02155_test_table; DROP DICTIONARY 02155_test_dictionary; +DROP TABLE 02155_test_table; diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference index 86a36a9392c..876cee60baa 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference @@ -1,6 +1,9 @@ - Prewhere info - Prewhere filter - Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) Prewhere info Prewhere filter Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) @@ -8,8 +11,15 @@ Prewhere filter Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) 2 - Filter column: and(equals(k, 3), notEmpty(v)) (removed) + Filter column: and(equals(k, 3), notEmpty(v)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) 2 - Filter column: and(equals(k, 3), notEmpty(v)) (removed) - Filter column: and(equals(k, 3), notEmpty(v)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) 2 diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql index ca61a8f2d57..4f010ebadfd 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql @@ -24,7 +24,8 @@ INSERT INTO t_02156_mt1 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_mt2 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_log SELECT number, toString(number) FROM numbers(10000); -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%' settings allow_experimental_analyzer=1; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%' settings allow_experimental_analyzer=0; SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v); SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere_2.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere_2.reference new file mode 100644 index 00000000000..8e759648871 --- /dev/null +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere_2.reference @@ -0,0 +1,12 @@ +1 a +1 a +2 b +2 b +1 a +1 a +2 b +2 b +1 a +2 b +1 a +2 b diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere_2.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere_2.sql new file mode 100644 index 00000000000..1b4881d4e7b --- /dev/null +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere_2.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS t_02156_ololo_1; +DROP TABLE IF EXISTS t_02156_ololo_2; +DROP TABLE IF EXISTS t_02156_ololo_dist; + +CREATE TABLE t_02156_ololo_1 (k UInt32, v Nullable(String)) ENGINE = MergeTree order by k; +CREATE TABLE t_02156_ololo_2 (k UInt32, v String) ENGINE = MergeTree order by k; +CREATE TABLE t_02156_ololo_dist (k UInt32, v String) ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_02156_ololo_2); +CREATE TABLE t_02156_ololo_dist2 (k UInt32, v Nullable(String)) ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_02156_ololo_1); + +insert into t_02156_ololo_1 values (1, 'a'); +insert into t_02156_ololo_2 values (2, 'b'); + +select * from merge('t_02156_ololo') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=0; +select * from merge('t_02156_ololo') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=1; + +select * from merge('t_02156_ololo_dist') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=0; +select * from merge('t_02156_ololo_dist') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere_not_ready_set_bug.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere_not_ready_set_bug.reference new file mode 100644 index 00000000000..20c58c33770 --- /dev/null +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere_not_ready_set_bug.reference @@ -0,0 +1 @@ +59900 1000 1396 diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere_not_ready_set_bug.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere_not_ready_set_bug.sql new file mode 100644 index 00000000000..fc18c97cb6e --- /dev/null +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere_not_ready_set_bug.sql @@ -0,0 +1,7 @@ +create table merge_kek_1 (x UInt32, y UInt32) engine = MergeTree order by x; +create table merge_kek_2 (x UInt32, y UInt32) engine = MergeTree order by x; + +insert into merge_kek_1 select number, number from numbers(100); +insert into merge_kek_2 select number + 500, number + 500 from numbers(1e6); + +select sum(x), min(x + x), max(x + x) from merge(currentDatabase(), '^merge_kek_.$') where x > 200 and y in (select 500 + number * 2 from numbers(100)) settings max_threads=2; diff --git a/tests/queries/0_stateless/02183_dictionary_date_types.sql b/tests/queries/0_stateless/02183_dictionary_date_types.sql index e06863d5e53..5671f47cdab 100644 --- a/tests/queries/0_stateless/02183_dictionary_date_types.sql +++ b/tests/queries/0_stateless/02183_dictionary_date_types.sql @@ -170,8 +170,8 @@ LIFETIME(0); SELECT 'Polygon dictionary'; SELECT * FROM 02183_polygon_dictionary; -DROP TABLE 02183_polygon_dictionary_source_table; DROP DICTIONARY 02183_polygon_dictionary; +DROP TABLE 02183_polygon_dictionary_source_table; DROP TABLE IF EXISTS 02183_range_dictionary_source_table; CREATE TABLE 02183_range_dictionary_source_table diff --git a/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql b/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql index e6edee2ea18..a36c72de0ac 100644 --- a/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql +++ b/tests/queries/0_stateless/02185_range_hashed_dictionary_open_ranges.sql @@ -60,4 +60,5 @@ SELECT dictHas('02185_range_dictionary', 0, 0); SELECT dictHas('02185_range_dictionary', 0, 5001); SELECT dictHas('02185_range_dictionary', 0, 10001); +DROP DICTIONARY 02185_range_dictionary; DROP TABLE 02185_range_dictionary_source_table; diff --git a/tests/queries/0_stateless/02210_processors_profile_log.reference b/tests/queries/0_stateless/02210_processors_profile_log.reference index 41543d0706a..035bd9897ad 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log.reference @@ -1,38 +1,8 @@ --- { echo } -EXPLAIN PIPELINE SELECT sleep(1); (Expression) ExpressionTransform (ReadFromStorage) SourceFromSingleChunk 0 → 1 -SELECT sleep(1) SETTINGS log_processors_profiles=true, log_queries=1, log_queries_min_type='QUERY_FINISH'; 0 -SYSTEM FLUSH LOGS; -WITH - ( - SELECT query_id - FROM system.query_log - WHERE current_database = currentDatabase() AND Settings['log_processors_profiles']='1' - ) AS query_id_ -SELECT - name, - multiIf( - -- ExpressionTransform executes sleep(), - -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>=1e6, - -- SourceFromSingleChunk, that feed data to ExpressionTransform, - -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6, - -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs - -- so they cannot starts to execute before sleep(1) will be executed. - input_wait_elapsed_us>=1e6) - elapsed, - input_rows, - input_bytes, - output_rows, - output_bytes -FROM system.processors_profile_log -WHERE query_id = query_id_ -ORDER BY name; ExpressionTransform 1 1 1 1 1 LazyOutputFormat 1 1 1 0 0 LimitsCheckingTransform 1 1 1 1 1 diff --git a/tests/queries/0_stateless/02210_processors_profile_log.sql b/tests/queries/0_stateless/02210_processors_profile_log.sql index a15ed26fd67..59edbb71457 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.sql +++ b/tests/queries/0_stateless/02210_processors_profile_log.sql @@ -1,4 +1,3 @@ --- { echo } EXPLAIN PIPELINE SELECT sleep(1); SELECT sleep(1) SETTINGS log_processors_profiles=true, log_queries=1, log_queries_min_type='QUERY_FINISH'; @@ -15,13 +14,13 @@ SELECT multiIf( -- ExpressionTransform executes sleep(), -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>=1e6, + name = 'ExpressionTransform', elapsed_us>=1e6 ? 1 : elapsed_us, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6, + name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6 ? 1 : output_wait_elapsed_us, -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs -- so they cannot starts to execute before sleep(1) will be executed. - input_wait_elapsed_us>=1e6) + input_wait_elapsed_us>=1e6 ? 1 : input_wait_elapsed_us) elapsed, input_rows, input_bytes, diff --git a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql index dbe18953957..f1f7c876ba6 100644 --- a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql +++ b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql @@ -28,6 +28,7 @@ create materialized view mv_02231 to buffer_02231 as select from in_02231 group by key; +set optimize_trivial_insert_select = 1; insert into in_02231 select * from numbers(10e6) settings max_memory_usage='310Mi', max_threads=1; drop table buffer_02231; diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index 5af6565c03d..b23d2d25111 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -21,6 +21,7 @@ CREATE TABLE t_random_1 ) ENGINE = GenerateRandom(1, 5, 3); +SET optimize_trivial_insert_select = 1; INSERT INTO t_1 select rowNumberInAllBlocks(), *, '1984-01-01' from t_random_1 limit 1000000; OPTIMIZE TABLE t_1 FINAL; diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference index 93b6d4de94f..6b5dd182112 100644 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference @@ -34,3 +34,21 @@ DOWNLOADED 0 79 80 DOWNLOADED 0 745 746 2 Expect no cache +Using storage policy: azure_cache +0 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect cache +DOWNLOADED 0 0 1 +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +3 +Expect no cache +Expect cache +DOWNLOADED 0 79 80 +DOWNLOADED 0 745 746 +2 +Expect no cache diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index 9aa631c5d0a..57b8cec7864 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index 186dcc1eeb2..f53f00992e7 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -218,3 +218,113 @@ SELECT count() FROM test_02241 5010500 SELECT count() FROM test_02241 WHERE value LIKE '%010%' 18816 +Using storage policy: azure_cache +DROP TABLE IF EXISTS test_02241 +CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='azure_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1 +SYSTEM STOP MERGES test_02241 +SYSTEM DROP FILESYSTEM CACHE +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path +0 +SELECT count(), sum(size) FROM system.filesystem_cache +0 0 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical +Row 1: +────── +file_segment_range_begin: 0 +file_segment_range_end: 745 +size: 746 +state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path +8 +SELECT count(), sum(size) FROM system.filesystem_cache +8 1100 +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 +0 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 +2 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 +2 +SELECT count(), sum(size) size FROM system.filesystem_cache +8 1100 +SYSTEM DROP FILESYSTEM CACHE +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical; +Row 1: +────── +file_segment_range_begin: 0 +file_segment_range_end: 1659 +size: 1660 +state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path +8 +SELECT count(), sum(size) FROM system.filesystem_cache +8 2014 +SELECT count(), sum(size) FROM system.filesystem_cache +8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0 +SELECT count(), sum(size) FROM system.filesystem_cache +8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000) +SELECT count(), sum(size) FROM system.filesystem_cache +24 84045 +SYSTEM START MERGES test_02241 +OPTIMIZE TABLE test_02241 FINAL +SELECT count(), sum(size) FROM system.filesystem_cache +32 167243 +ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100 +SELECT count(), sum(size) FROM system.filesystem_cache +41 250541 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) +SYSTEM FLUSH LOGS +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) 0 +SELECT count() FROM test_02241 +5010500 +SELECT count() FROM test_02241 WHERE value LIKE '%010%' +18816 diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index c1d930f54a7..1028fba76f5 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --echo --query "DROP TABLE IF EXISTS test_02241" diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference index 99f31df7def..447e1a275fc 100644 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference @@ -6,3 +6,7 @@ Using storage policy: local_cache (0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_CACHE +Using storage policy: azure_cache +(0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE +(0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE +(0,808110) READ_FROM_CACHE diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh index 4c92d1d2954..7a665d81eab 100755 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" diff --git a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference index 8ad0a566c62..1c60e40942c 100644 --- a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference +++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference @@ -97,8 +97,8 @@ c1 Array(Nullable(Bool)) [] [NULL] [false] -c1 Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64)) -(1,2,3) +c1 Nullable(String) +(1, 2, 3) c1 Nullable(String) 123.123 c1 Array(Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64))) diff --git a/tests/queries/0_stateless/02265_column_ttl.sql b/tests/queries/0_stateless/02265_column_ttl.sql index 16ae2da2a2b..ac64dd9457a 100644 --- a/tests/queries/0_stateless/02265_column_ttl.sql +++ b/tests/queries/0_stateless/02265_column_ttl.sql @@ -16,7 +16,8 @@ insert into ttl_02265 values ('2010-01-01', 2010, 'foo'); optimize table ttl_02265 final; -- after, 20100101_0_0_2 will not have ttl.txt, but will have value.bin optimize table ttl_02265 final; -system sync replica ttl_02265; +system sync replica ttl_02265 STRICT; +system sync replica ttl_02265_r2 STRICT; -- after detach/attach it will not have TTL in-memory, and will not have ttl.txt detach table ttl_02265; diff --git a/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql b/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql index b2a04788bbb..f8faa3e653b 100644 --- a/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql +++ b/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql @@ -68,4 +68,10 @@ with last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc ) select d.repo_name, columns('count') from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name; +set allow_suspicious_low_cardinality_types=1; + +CREATE TABLE github_events__fuzz_0 (`file_time` Int64, `event_type` Enum8('CommitCommentEvent' = 1, 'CreateEvent' = 2, 'DeleteEvent' = 3, 'ForkEvent' = 4, 'GollumEvent' = 5, 'IssueCommentEvent' = 6, 'IssuesEvent' = 7, 'MemberEvent' = 8, 'PublicEvent' = 9, 'PullRequestEvent' = 10, 'PullRequestReviewCommentEvent' = 11, 'PushEvent' = 12, 'ReleaseEvent' = 13, 'SponsorshipEvent' = 14, 'WatchEvent' = 15, 'GistEvent' = 16, 'FollowEvent' = 17, 'DownloadEvent' = 18, 'PullRequestReviewEvent' = 19, 'ForkApplyEvent' = 20, 'Event' = 21, 'TeamAddEvent' = 22), `actor_login` LowCardinality(String), `repo_name` LowCardinality(Nullable(String)), `created_at` DateTime, `updated_at` DateTime, `action` Array(Enum8('none' = 0, 'created' = 1, 'added' = 2, 'edited' = 3, 'deleted' = 4, 'opened' = 5, 'closed' = 6, 'reopened' = 7, 'assigned' = 8, 'unassigned' = 9, 'labeled' = 10, 'unlabeled' = 11, 'review_requested' = 12, 'review_request_removed' = 13, 'synchronize' = 14, 'started' = 15, 'published' = 16, 'update' = 17, 'create' = 18, 'fork' = 19, 'merged' = 20)), `comment_id` UInt64, `body` String, `path` LowCardinality(String), `position` Int32, `line` Int32, `ref` String, `ref_type` Enum8('none' = 0, 'branch' = 1, 'tag' = 2, 'repository' = 3, 'unknown' = 4), `creator_user_login` Int16, `number` UInt32, `title` String, `labels` Array(Array(LowCardinality(String))), `state` Enum8('none' = 0, 'open' = 1, 'closed' = 2), `locked` UInt8, `assignee` Array(LowCardinality(String)), `assignees` Array(LowCardinality(String)), `comments` UInt32, `author_association` Array(Enum8('NONE' = 0, 'CONTRIBUTOR' = 1, 'OWNER' = 2, 'COLLABORATOR' = 3, 'MEMBER' = 4, 'MANNEQUIN' = 5)), `closed_at` UUID, `merged_at` DateTime, `merge_commit_sha` Nullable(String), `requested_reviewers` Array(LowCardinality(Int64)), `requested_teams` Array(String), `head_ref` String, `head_sha` String, `base_ref` String, `base_sha` String, `merged` Nullable(UInt8), `mergeable` Nullable(UInt8), `rebaseable` LowCardinality(UInt8), `mergeable_state` Array(Enum8('unknown' = 0, 'dirty' = 1, 'clean' = 2, 'unstable' = 3, 'draft' = 4)), `merged_by` LowCardinality(String), `review_comments` UInt32, `maintainer_can_modify` Nullable(UInt8), `commits` UInt32, `additions` Nullable(UInt32), `deletions` UInt32, `changed_files` UInt32, `diff_hunk` Nullable(String), `original_position` UInt32, `commit_id` String, `original_commit_id` String, `push_size` UInt32, `push_distinct_size` UInt32, `member_login` LowCardinality(String), `release_tag_name` LowCardinality(String), `release_name` String, `review_state` Int16) ENGINE = MergeTree ORDER BY (event_type, repo_name, created_at) settings allow_nullable_key=1; + +EXPLAIN PIPELINE header = true, compact = true WITH top_repos AS (SELECT repo_name FROM github_events__fuzz_0 WHERE (event_type = 'WatchEvent') AND (toDate(created_at) = (today() - 1)) GROUP BY repo_name ORDER BY count() DESC LIMIT 100 UNION DISTINCT SELECT repo_name FROM github_events__fuzz_0 WHERE (event_type = 'WatchEvent') AND (toMonday(created_at) = toMonday(today() - toIntervalWeek(1))) GROUP BY repo_name ORDER BY count() DESC LIMIT 100 UNION DISTINCT SELECT repo_name FROM github_events__fuzz_0 PREWHERE (event_type = 'WatchEvent') AND (toStartOfMonth(created_at) = (toStartOfMonth(today()) - toIntervalMonth(1))) GROUP BY repo_name ORDER BY count() DESC LIMIT 100 UNION DISTINCT SELECT repo_name FROM github_events WHERE (event_type = 'WatchEvent') AND (toYear(created_at) = (toYear(today()) - 1)) GROUP BY repo_name ORDER BY count() DESC LIMIT 100), last_day AS (SELECT repo_name, count() AS count_last_day, rowNumberInAllBlocks() + 1 AS position_last_day FROM github_events WHERE (repo_name IN (SELECT repo_name FROM top_repos)) AND (toDate(created_at) = (today() - 1)) GROUP BY repo_name ORDER BY count_last_day DESC), last_week AS (SELECT repo_name, count() AS count_last_week, rowNumberInAllBlocks() + 1 AS position_last_week FROM github_events WHERE (repo_name IN (SELECT repo_name FROM top_repos)) AND (toMonday(created_at) = (toMonday(today()) - toIntervalWeek(2))) GROUP BY repo_name ORDER BY count_last_week DESC), last_month AS (SELECT repo_name, count() AS count_last_month, rowNumberInAllBlocks() + 1 AS position_last_month FROM github_events__fuzz_0 WHERE ('deleted' = 4) AND in(repo_name) AND (toStartOfMonth(created_at) = (toStartOfMonth(today()) - toIntervalMonth(1))) GROUP BY repo_name ORDER BY count_last_month DESC) SELECT d.repo_name, COLUMNS(count) FROM last_day AS d INNER JOIN last_week AS w ON d.repo_name = w.repo_name INNER JOIN last_month AS m ON d.repo_name = m.repo_name format Null; -- { serverError INVALID_SETTING_VALUE } + DROP TABLE github_events; diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference index b4e5b6715de..e3875dbabe1 100644 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference @@ -16,3 +16,12 @@ Using storage policy: local_cache 1 1 0 +Using storage policy: azure_cache +0 +2 +0 +1 +1 +1 +1 +0 diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index 1e1841862e9..a2c9352b7aa 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02286" diff --git a/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql b/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql index 3825df1e557..a2c2fc7cba2 100644 --- a/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql +++ b/tests/queries/0_stateless/02302_column_decl_null_before_defaul_value.sql @@ -56,6 +56,6 @@ ALTER TABLE null_before ALTER COLUMN id TYPE INT NULL; -- { clientError SYNTAX_E select 'modify column, NULL modifier is not allowed'; DROP TABLE IF EXISTS null_before SYNC; CREATE TABLE null_before (id INT NOT NULL) ENGINE=MergeTree() ORDER BY tuple(); -ALTER TABLE null_before MODIFY COLUMN id NULL DEFAULT 1; -- { serverError UNKNOWN_TYPE } +ALTER TABLE null_before MODIFY COLUMN id NULL DEFAULT 1; -- { clientError SYNTAX_ERROR } DROP TABLE IF EXISTS null_before SYNC; diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference b/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference index 062aac259a4..0a9e1c20b59 100644 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.reference @@ -1,3 +1,4 @@ Using storage policy: s3_cache Using storage policy: local_cache Using storage policy: s3_cache_multi +Using storage policy: azure_cache diff --git a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh index f5de4346fd6..fbaec1ffaa7 100755 --- a/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh +++ b/tests/queries/0_stateless/02313_filesystem_cache_seeks.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -for STORAGE_POLICY in 's3_cache' 'local_cache' 's3_cache_multi'; do +for STORAGE_POLICY in 's3_cache' 'local_cache' 's3_cache_multi' 'azure_cache'; do echo "Using storage policy: $STORAGE_POLICY" $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference index 85e8a802bdc..9b9885478cd 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference @@ -87,7 +87,7 @@ Sorting (Stream): a ASC, b ASC Sorting (Stream): __table1.a ASC, __table1.b ASC Sorting (Stream): __table1.a ASC, __table1.b ASC Sorting (Stream): __table1.a ASC, __table1.b ASC -Sorting (Stream): __table1.a ASC, b ASC +Sorting (Stream): a ASC, b ASC -- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns Sorting (Stream): __table1.a ASC Sorting (Stream): __table1.a ASC diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index caa600298ce..bd018018789 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -16,7 +16,7 @@ function check_refcnt_for_table() -- queue may hold the parts lock for awhile as well system stop pulling replication log $table; " - $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(200)" + $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(2000)" local query_id query_id="$table-$(random_str 10)" @@ -24,6 +24,7 @@ function check_refcnt_for_table() local log_file log_file=$(mktemp "$CUR_DIR/clickhouse-tests.XXXXXX.log") local args=( + --allow_repeated_settings --format Null --max_threads 1 --max_block_size 1 @@ -34,7 +35,7 @@ function check_refcnt_for_table() ) # Notes: - # - query may sleep 0.1*(200/4)=5 seconds maximum, it is enough to check system.parts + # - query may sleep 0.1*(2000/4)=50 seconds maximum, it is enough to check system.parts # - "part = 1" condition should prune all parts except first # - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier $CLICKHOUSE_CLIENT "${args[@]}" -q "select sleepEachRow(0.1) from $table where part = 1" & diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index db8182e30bb..6895606eb2b 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1,2 @@ 1 -102400 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 5 5000 0 16 +102400 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 0 5000 0 16 diff --git a/tests/queries/0_stateless/02346_fulltext_index_match_predicate.reference b/tests/queries/0_stateless/02346_fulltext_index_match_predicate.reference index 84fc422379c..e890eac1794 100644 --- a/tests/queries/0_stateless/02346_fulltext_index_match_predicate.reference +++ b/tests/queries/0_stateless/02346_fulltext_index_match_predicate.reference @@ -1,19 +1,19 @@ -1 Hello ClickHouse -2 Hello World +1 Well, Hello ClickHouse ! +2 Well, Hello World ! Granules: 6/6 Granules: 2/6 Granules: 6/6 Granules: 2/6 --- -1 Hello ClickHouse -2 Hello World -6 World Champion +1 Well, Hello ClickHouse ! +2 Well, Hello World ! +6 True World Champion Granules: 6/6 Granules: 3/6 Granules: 6/6 Granules: 3/6 --- -5 OLAP Database +5 Its An OLAP Database Granules: 6/6 Granules: 1/6 Granules: 6/6 diff --git a/tests/queries/0_stateless/02346_fulltext_index_match_predicate.sql b/tests/queries/0_stateless/02346_fulltext_index_match_predicate.sql index 927e605c20a..7f36c423a41 100644 --- a/tests/queries/0_stateless/02346_fulltext_index_match_predicate.sql +++ b/tests/queries/0_stateless/02346_fulltext_index_match_predicate.sql @@ -14,19 +14,19 @@ ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 1; -INSERT INTO tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion'); +INSERT INTO tab VALUES (1, 'Well, Hello ClickHouse !'), (2, 'Well, Hello World !'), (3, 'Good Weather !'), (4, 'Say Hello !'), (5, 'Its An OLAP Database'), (6, 'True World Champion'); -SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id; +SELECT * FROM tab WHERE match(str, ' Hello (ClickHouse|World) ') ORDER BY id; -- Read 2/6 granules --- Required string: 'Hello ' --- Alternatives: 'Hello ClickHouse', 'Hello World' +-- Required string: ' Hello ' +-- Alternatives: ' Hello ClickHouse ', ' Hello World ' SELECT * FROM ( EXPLAIN PLAN indexes=1 - SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id + SELECT * FROM tab WHERE match(str, ' Hello (ClickHouse|World) ') ORDER BY id ) WHERE explain LIKE '%Granules: %' @@ -37,7 +37,7 @@ SELECT * FROM ( EXPLAIN PLAN indexes=1 - SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id + SELECT * FROM tab WHERE match(str, ' Hello (ClickHouse|World) ') ORDER BY id ) WHERE explain LIKE '%Granules: %' @@ -46,17 +46,17 @@ SETTINGS SELECT '---'; -SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id; +SELECT * FROM tab WHERE match(str, '.* (ClickHouse|World) ') ORDER BY id; -- Read 3/6 granules -- Required string: - --- Alternatives: 'ClickHouse', 'World' +-- Alternatives: ' ClickHouse ', ' World ' SELECT * FROM ( EXPLAIN PLAN indexes = 1 - SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id + SELECT * FROM tab WHERE match(str, '.* (ClickHouse|World) ') ORDER BY id ) WHERE explain LIKE '%Granules: %' @@ -67,7 +67,7 @@ SELECT * FROM ( EXPLAIN PLAN indexes = 1 - SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id + SELECT * FROM tab WHERE match(str, '.* (ClickHouse|World) ') ORDER BY id ) WHERE explain LIKE '%Granules: %' @@ -76,17 +76,17 @@ SETTINGS SELECT '---'; -SELECT * FROM tab WHERE match(str, 'OLAP.*') ORDER BY id; +SELECT * FROM tab WHERE match(str, ' OLAP .*') ORDER BY id; -- Read 1/6 granules --- Required string: 'OLAP' +-- Required string: ' OLAP ' -- Alternatives: - SELECT * FROM ( EXPLAIN PLAN indexes = 1 - SELECT * FROM tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id + SELECT * FROM tab WHERE match(str, ' OLAP (.*?)*') ORDER BY id ) WHERE explain LIKE '%Granules: %' @@ -97,7 +97,7 @@ SELECT * FROM ( EXPLAIN PLAN indexes = 1 - SELECT * FROM tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id + SELECT * FROM tab WHERE match(str, ' OLAP (.*?)*') ORDER BY id ) WHERE explain LIKE '%Granules: %' diff --git a/tests/queries/0_stateless/02346_fulltext_index_old_name.sql b/tests/queries/0_stateless/02346_fulltext_index_old_name.sql index bc641caf237..4e52e689211 100644 --- a/tests/queries/0_stateless/02346_fulltext_index_old_name.sql +++ b/tests/queries/0_stateless/02346_fulltext_index_old_name.sql @@ -1,22 +1,16 @@ +-- Index type 'inverted' was renamed to 'full_text' in April 2024. +-- Such indexes are experimental. Test what happens when ClickHouse encounters tables with the old index type. + DROP TABLE IF EXISTS tab; --- Index type 'inverted' was renamed to 'full_text' in April 2024. --- Such indexes are experimental. Nevertheless test what happens when ClickHouse encounters tables with the old index type. +-- It must be possible to load old tables with 'inverted'-type indexes +-- In stateless tests, we cannot use old persistences. Emulate "loading an old index" by creating it (internally, similar code executes). --- Create a full text index with the old type --- This was how it was done in the old days. These days this throws an exception. -SET allow_experimental_inverted_index = 1; -CREATE TABLE tab(k UInt64, s String, INDEX idx(s) TYPE inverted(2)) ENGINE = MergeTree() ORDER BY k; -- { serverError ILLEGAL_INDEX }; - --- There are unfortunately side effects of this behavior. In particular, if ClickHouse's automatic table load during --- startup finds a table with 'inverted'-type indexes created by an older version, it immediately halts as it thinks --- the persistence is corrupt. Similarly (but less severely), tables with 'inverted' index cannot be attached. --- A backdoor avoids this. Just set allow_experimental_inverted_index = 0 (which is the default). --- --- Note that the backdoor will exist only temporarily during a transition period. It will be removed in future. Its only purpose is --- to simplify the migrationn of experimental inverted indexes to experimental full-text indexes instead of simply breaking existing --- tables. +-- Creation only works with the (old) setting enabled. SET allow_experimental_inverted_index = 0; +CREATE TABLE tab(k UInt64, s String, INDEX idx(s) TYPE inverted(2)) ENGINE = MergeTree() ORDER BY k; -- { serverError ILLEGAL_INDEX } + +SET allow_experimental_inverted_index = 1; CREATE TABLE tab(k UInt64, s String, INDEX idx(s) TYPE inverted(2)) ENGINE = MergeTree() ORDER BY k; INSERT INTO tab VALUES (1, 'ab') (2, 'bc'); @@ -24,14 +18,12 @@ INSERT INTO tab VALUES (1, 'ab') (2, 'bc'); DETACH TABLE tab; ATTACH TABLE tab; --- No, the backdoor does not make 'inverted' indexes non-experimental. --- On the one hand, the backdoor is undocumented, on the other hand, SELECTs that use such indexes now throw an exception, --- making 'inverted' indexes useless. +-- To encourage users to migrate to the new index type, we now throw an exception when the index is used by queries. SELECT * from tab WHERE s = 'bc'; -- { serverError ILLEGAL_INDEX } -- The exception recommends to drop the index and create a 'full_text' index instead. Let's try. ALTER TABLE tab DROP INDEX idx; -SET allow_experimental_full_text_index = 1; -- note that this is a different setting +SET allow_experimental_full_text_index = 1; -- the new setting ALTER TABLE tab ADD INDEX idx(s) TYPE full_text(2); SELECT * from tab WHERE s = 'bc'; diff --git a/tests/queries/0_stateless/02346_fulltext_index_search.reference b/tests/queries/0_stateless/02346_fulltext_index_search.reference index d742bbc77ec..d7c89d434e7 100644 --- a/tests/queries/0_stateless/02346_fulltext_index_search.reference +++ b/tests/queries/0_stateless/02346_fulltext_index_search.reference @@ -13,19 +13,19 @@ af full_text 1 Test full_text() af full_text -101 Alick a01 -106 Alick a06 -111 Alick b01 -116 Alick b06 -101 Alick a01 -106 Alick a06 +101 x Alick a01 y +106 x Alick a06 y +111 x Alick b01 y +116 x Alick b06 y +101 x Alick a01 y +106 x Alick a06 y 1 -101 Alick a01 -111 Alick b01 +101 x Alick a01 y +111 x Alick b01 y 1 Test on array columns af full_text -3 ['Click a03','Click b03'] +3 ['x Click a03 y','x Click b03 y'] 1 Test on map columns af full_text diff --git a/tests/queries/0_stateless/02346_fulltext_index_search.sql b/tests/queries/0_stateless/02346_fulltext_index_search.sql index 6b06bde6598..80f49790201 100644 --- a/tests/queries/0_stateless/02346_fulltext_index_search.sql +++ b/tests/queries/0_stateless/02346_fulltext_index_search.sql @@ -67,7 +67,7 @@ CREATE TABLE tab_x(k UInt64, s String, INDEX af(s) TYPE full_text()) ENGINE = MergeTree() ORDER BY k SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi'; -INSERT INTO tab_x VALUES (101, 'Alick a01'), (102, 'Blick a02'), (103, 'Click a03'), (104, 'Dlick a04'), (105, 'Elick a05'), (106, 'Alick a06'), (107, 'Blick a07'), (108, 'Click a08'), (109, 'Dlick a09'), (110, 'Elick a10'), (111, 'Alick b01'), (112, 'Blick b02'), (113, 'Click b03'), (114, 'Dlick b04'), (115, 'Elick b05'), (116, 'Alick b06'), (117, 'Blick b07'), (118, 'Click b08'), (119, 'Dlick b09'), (120, 'Elick b10'); +INSERT INTO tab_x VALUES (101, 'x Alick a01 y'), (102, 'x Blick a02 y'), (103, 'x Click a03 y'), (104, 'x Dlick a04 y'), (105, 'x Elick a05 y'), (106, 'x Alick a06 y'), (107, 'x Blick a07 y'), (108, 'x Click a08 y'), (109, 'x Dlick a09 y'), (110, 'x Elick a10 y'), (111, 'x Alick b01 y'), (112, 'x Blick b02 y'), (113, 'x Click b03 y'), (114, 'x Dlick b04 y'), (115, 'x Elick b05 y'), (116, 'x Alick b06 y'), (117, 'x Blick b07 y'), (118, 'x Click b08 y'), (119, 'x Dlick b09 y'), (120, 'x Elick b10 y'); -- check full_text index was created SELECT name, type FROM system.data_skipping_indices WHERE table == 'tab_x' AND database = currentDatabase() LIMIT 1; @@ -86,27 +86,27 @@ SELECT read_rows==8 from system.query_log LIMIT 1; -- search full_text index with IN operator -SELECT * FROM tab_x WHERE s IN ('Alick a01', 'Alick a06') ORDER BY k; +SELECT * FROM tab_x WHERE s IN ('x Alick a01 y', 'x Alick a06 y') ORDER BY k; -- check the query only read 2 granules (4 rows total; each granule has 2 rows) SYSTEM FLUSH LOGS; SELECT read_rows==4 from system.query_log WHERE query_kind ='Select' AND current_database = currentDatabase() - AND endsWith(trimRight(query), 'SELECT * FROM tab_x WHERE s IN (\'Alick a01\', \'Alick a06\') ORDER BY k;') + AND endsWith(trimRight(query), 'SELECT * FROM tab_x WHERE s IN (\'x Alick a01 y\', \'x Alick a06 y\') ORDER BY k;') AND type='QueryFinish' AND result_rows==2 LIMIT 1; -- search full_text index with multiSearch -SELECT * FROM tab_x WHERE multiSearchAny(s, ['a01', 'b01']) ORDER BY k; +SELECT * FROM tab_x WHERE multiSearchAny(s, [' a01 ', ' b01 ']) ORDER BY k; -- check the query only read 2 granules (4 rows total; each granule has 2 rows) SYSTEM FLUSH LOGS; SELECT read_rows==4 from system.query_log WHERE query_kind ='Select' AND current_database = currentDatabase() - AND endsWith(trimRight(query), 'SELECT * FROM tab_x WHERE multiSearchAny(s, [\'a01\', \'b01\']) ORDER BY k;') + AND endsWith(trimRight(query), 'SELECT * FROM tab_x WHERE multiSearchAny(s, [\' a01 \', \' b01 \']) ORDER BY k;') AND type='QueryFinish' AND result_rows==2 LIMIT 1; @@ -126,14 +126,14 @@ INSERT INTO tab SELECT rowNumberInBlock(), groupArray(s) FROM tab_x GROUP BY k%1 SELECT name, type FROM system.data_skipping_indices WHERE table == 'tab' AND database = currentDatabase() LIMIT 1; -- search full_text index with has -SELECT * FROM tab WHERE has(s, 'Click a03') ORDER BY k; +SELECT * FROM tab WHERE has(s, 'x Click a03 y') ORDER BY k; -- check the query must read all 10 granules (20 rows total; each granule has 2 rows) SYSTEM FLUSH LOGS; SELECT read_rows==2 from system.query_log WHERE query_kind ='Select' AND current_database = currentDatabase() - AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE has(s, \'Click a03\') ORDER BY k;') + AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE has(s, \'x Click a03 y\') ORDER BY k;') AND type='QueryFinish' AND result_rows==1 LIMIT 1; diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql index b266679b06a..75a81c5dbd3 100644 --- a/tests/queries/0_stateless/02366_kql_create_table.sql +++ b/tests/queries/0_stateless/02366_kql_create_table.sql @@ -1,8 +1,8 @@ DROP TABLE IF EXISTS Customers; CREATE TABLE Customers -( +( FirstName Nullable(String), - LastName String, + LastName String, Occupation String, Education String, Age Nullable(UInt8) @@ -10,20 +10,20 @@ CREATE TABLE Customers INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); Select '-- test create table --' ; -Select * from kql(Customers|project FirstName) limit 1;; +Select * from kql($$Customers|project FirstName$$) limit 1;; DROP TABLE IF EXISTS kql_table1; -CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName | filter LastName=='Diaz'); +CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql($$Customers | project LastName | filter LastName=='Diaz'$$); select LastName from kql_table1 limit 1; DROP TABLE IF EXISTS kql_table2; CREATE TABLE kql_table2 -( +( FirstName Nullable(String), - LastName String, + LastName String, Age Nullable(UInt8) ) ENGINE = Memory; -INSERT INTO kql_table2 select * from kql(Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'); +INSERT INTO kql_table2 select * from kql($$Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'$$); select * from kql_table2 limit 1; --- select * from kql(Customers | where FirstName !in ("test", "test2")); +-- select * from kql($$Customers | where FirstName !in ("test", "test2")$$); DROP TABLE IF EXISTS Customers; DROP TABLE IF EXISTS kql_table1; -DROP TABLE IF EXISTS kql_table2; \ No newline at end of file +DROP TABLE IF EXISTS kql_table2; diff --git a/tests/queries/0_stateless/02375_pretty_formats.sql.j2 b/tests/queries/0_stateless/02375_pretty_formats.sql.j2 index 55462ea6b61..36a4479260c 100644 --- a/tests/queries/0_stateless/02375_pretty_formats.sql.j2 +++ b/tests/queries/0_stateless/02375_pretty_formats.sql.j2 @@ -3,6 +3,6 @@ 'PrettySpaceNoEscapesMonoBlock'] -%} select '{{ format }}'; -select number as x, number + 1 as y from numbers(4) settings max_block_size=2, output_format_pretty_color=1 format {{ format }}; +select number as x, number + 1 as y from numbers(4) settings max_block_size=2, output_format_pretty_color=1, output_format_pretty_display_footer_column_names=0 format {{ format }}; {% endfor -%} diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql index 0c228c13f19..1a584b8b5b2 100644 --- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql +++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql @@ -1,5 +1,7 @@ -- Tags: no-random-merge-tree-settings +SET optimize_trivial_insert_select = 1; + drop table if exists test_02381; create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; insert into test_02381 select number, number * 10 from system.numbers limit 1000000; diff --git a/tests/queries/0_stateless/02381_parseDateTime64BestEffortUS.sql b/tests/queries/0_stateless/02381_parseDateTime64BestEffortUS.sql index 21dc7b1a990..510ed4b126f 100644 --- a/tests/queries/0_stateless/02381_parseDateTime64BestEffortUS.sql +++ b/tests/queries/0_stateless/02381_parseDateTime64BestEffortUS.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_display_footer_column_names=0; SELECT 'parseDateTime64BestEffortUS'; SELECT diff --git a/tests/queries/0_stateless/02391_recursive_buffer.sql b/tests/queries/0_stateless/02391_recursive_buffer.sql index 1a630722b5a..60a2f0d1af1 100644 --- a/tests/queries/0_stateless/02391_recursive_buffer.sql +++ b/tests/queries/0_stateless/02391_recursive_buffer.sql @@ -10,9 +10,5 @@ DROP TABLE test; DROP TABLE IF EXISTS test1; DROP TABLE IF EXISTS test2; CREATE TABLE test1 (key UInt32) Engine = Buffer(currentDatabase(), test2, 16, 10, 100, 10000, 1000000, 10000000, 100000000); -CREATE TABLE test2 (key UInt32) Engine = Buffer(currentDatabase(), test1, 16, 10, 100, 10000, 1000000, 10000000, 100000000); -SELECT * FROM test1; -- { serverError TOO_DEEP_RECURSION } -SELECT * FROM test2; -- { serverError TOO_DEEP_RECURSION } -SELECT * FROM system.tables WHERE table IN ('test1', 'test2') AND database = currentDatabase(); -- { serverError TOO_DEEP_RECURSION } +CREATE TABLE test2 (key UInt32) Engine = Buffer(currentDatabase(), test1, 16, 10, 100, 10000, 1000000, 10000000, 100000000); -- { serverError INFINITE_LOOP } DROP TABLE test1; -DROP TABLE test2; diff --git a/tests/queries/0_stateless/02403_big_http_chunk_size.python b/tests/queries/0_stateless/02403_big_http_chunk_size.python index 3213b8cd387..f74459489a5 100644 --- a/tests/queries/0_stateless/02403_big_http_chunk_size.python +++ b/tests/queries/0_stateless/02403_big_http_chunk_size.python @@ -16,7 +16,7 @@ def main(): sock.settimeout(60) s = "POST / HTTP/1.1\r\n" s += "Host: %s\r\n" % host - s += "Content-type: multipart/form-data\r\n" + s += "Content-type: multipart/form-data; boundary=--b3f1zid8kqwy\r\n" s += "Transfer-encoding: chunked\r\n" s += "\r\n" s += "ffffffffffffffff" diff --git a/tests/queries/0_stateless/02403_big_http_chunk_size.reference b/tests/queries/0_stateless/02403_big_http_chunk_size.reference index d7970bd2eb1..466ff9002e9 100644 --- a/tests/queries/0_stateless/02403_big_http_chunk_size.reference +++ b/tests/queries/0_stateless/02403_big_http_chunk_size.reference @@ -1,3 +1,3 @@ -HTTP/1.1 200 OK +HTTP/1.1 500 Internal Server Error encoding type chunked -error code 1000 +error code 69 diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql index 5e017e79309..a2de19dff8a 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.sql +++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql @@ -7,6 +7,7 @@ drop table if exists t_different_dbs; drop table if exists dist_t; drop table if exists t; +set optimize_trivial_insert_select = 1; create table t(a UInt64, b UInt64) engine=MergeTree order by a; system stop merges t; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql index cabcd230eb6..e9deb778075 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql @@ -3,7 +3,6 @@ SELECT name FROM system.functions WHERE NOT is_aggregate AND origin = 'System' AND alias_to = '' AND length(description) < 10 AND name NOT IN ( 'aes_decrypt_mysql', 'aes_encrypt_mysql', 'decrypt', 'encrypt', - 'base64Decode', 'base64Encode', 'tryBase64Decode', 'convertCharset', 'detectLanguage', 'detectLanguageMixed', 'geoToH3', diff --git a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 index 0ec6b2ed144..d7cbf210506 100644 --- a/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 +++ b/tests/queries/0_stateless/02461_prewhere_row_level_policy_lightweight_delete.sql.j2 @@ -1,5 +1,7 @@ {% for index_granularity in [999, 1000, 1001, 9999, 10000, 10001] %} +SET optimize_trivial_insert_select = 1; + DROP TABLE IF EXISTS url_na_log; CREATE TABLE url_na_log(SiteId UInt32, DateVisit Date, PRIMARY KEY (SiteId)) diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py index 9941736107f..4165eeb6d31 100755 --- a/tests/queries/0_stateless/02473_infile_progress.py +++ b/tests/queries/0_stateless/02473_infile_progress.py @@ -32,12 +32,12 @@ with client( ) client1.expect(prompt) client1.send(f"INSERT INTO test.infile_progress FROM INFILE '{filename}'") - client1.expect("Progress: 5.00 rows, 10.00 B.*\)") + client1.expect("Progress: 5.00 rows, 10.00 B.*\\)") client1.expect(prompt) # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference index 649b037fafa..11c178ac0d0 100644 --- a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference +++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference @@ -10,11 +10,15 @@ QUERY id: 0 JOIN TREE TABLE id: 3, alias: __table1, table_name: default.t_logical_expressions_optimizer_low_cardinality WHERE - FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8 + FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 5, nodes: 2 - COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 - CONSTANT id: 6, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String) + FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: LowCardinality(UInt8) + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 + CONSTANT id: 8, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String) + CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT a FROM t_logical_expressions_optimizer_low_cardinality @@ -28,11 +32,15 @@ QUERY id: 0 JOIN TREE TABLE id: 3, alias: __table1, table_name: default.t_logical_expressions_optimizer_low_cardinality WHERE - FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8 + FUNCTION id: 4, function_name: or, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 5, nodes: 2 - COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 - CONSTANT id: 6, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String) + FUNCTION id: 6, function_name: in, function_type: ordinary, result_type: LowCardinality(UInt8) + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 + CONSTANT id: 8, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String) + CONSTANT id: 9, constant_value: UInt64_0, constant_value_type: UInt8 SETTINGS allow_experimental_analyzer=1 SELECT a FROM t_logical_expressions_optimizer_low_cardinality @@ -46,11 +54,15 @@ QUERY id: 0 JOIN TREE TABLE id: 3, alias: __table1, table_name: default.t_logical_expressions_optimizer_low_cardinality WHERE - FUNCTION id: 4, function_name: notIn, function_type: ordinary, result_type: UInt8 + FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 5, nodes: 2 - COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 - CONSTANT id: 6, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String) + FUNCTION id: 6, function_name: notIn, function_type: ordinary, result_type: LowCardinality(UInt8) + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 + CONSTANT id: 8, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String) + CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String SETTINGS allow_experimental_analyzer=1 SELECT a FROM t_logical_expressions_optimizer_low_cardinality @@ -64,11 +76,15 @@ QUERY id: 0 JOIN TREE TABLE id: 3, alias: __table1, table_name: default.t_logical_expressions_optimizer_low_cardinality WHERE - FUNCTION id: 4, function_name: notIn, function_type: ordinary, result_type: UInt8 + FUNCTION id: 4, function_name: _CAST, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 5, nodes: 2 - COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 - CONSTANT id: 6, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String) + FUNCTION id: 6, function_name: notIn, function_type: ordinary, result_type: LowCardinality(UInt8) + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 2, column_name: a, result_type: LowCardinality(String), source_id: 3 + CONSTANT id: 8, constant_value: Tuple_(\'x\', \'y\'), constant_value_type: Tuple(String, String) + CONSTANT id: 9, constant_value: \'UInt8\', constant_value_type: String SETTINGS allow_experimental_analyzer=1 SELECT a FROM t_logical_expressions_optimizer_low_cardinality diff --git a/tests/queries/0_stateless/02488_zero_copy_detached_parts_drop_table.sh b/tests/queries/0_stateless/02488_zero_copy_detached_parts_drop_table.sh index b01f16e1cad..60cec5caea3 100755 --- a/tests/queries/0_stateless/02488_zero_copy_detached_parts_drop_table.sh +++ b/tests/queries/0_stateless/02488_zero_copy_detached_parts_drop_table.sh @@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is rm -f $path/count.txt $CLICKHOUSE_CLIENT -q "detach table rmt2 sync" -$CLICKHOUSE_CLIENT --send_logs_level='fatal' -q "attach table rmt2" +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level='fatal' -q "attach table rmt2" $CLICKHOUSE_CLIENT -q "select reason, name from system.detached_parts where database='$CLICKHOUSE_DATABASE' and table='rmt2'" diff --git a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference index 01d7fa2a2cb..5eb4670f3cf 100644 --- a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference +++ b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.reference @@ -18,3 +18,73 @@ QUERY id: 0 LIST id: 9, nodes: 1 CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 1 +QUERY id: 0 + PROJECTION COLUMNS + uniqCombined((materialize((number)))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: uniqCombined, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + JOIN TREE + TABLE_FUNCTION id: 5, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 6, nodes: 1 + CONSTANT id: 7, constant_value: UInt64_10, constant_value_type: UInt8 +10 +QUERY id: 0 + PROJECTION COLUMNS + uniq(abs(number)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: uniq, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: abs, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + uniq(toString(abs(materialize(number)))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: uniq, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: abs, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: materialize, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 10, nodes: 1 + CONSTANT id: 11, constant_value: UInt64_10, constant_value_type: UInt8 +QUERY id: 0 + PROJECTION COLUMNS + uniq((number, 1)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: uniq, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: tuple, function_type: ordinary, result_type: Tuple(UInt64, UInt8) + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: UInt64_10, constant_value_type: UInt8 diff --git a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql index 830db274678..5a3b2379fde 100644 --- a/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql +++ b/tests/queries/0_stateless/02493_analyzer_uniq_injective_functions_elimination.sql @@ -1,5 +1,14 @@ -SET allow_experimental_analyzer = 1; +SET allow_experimental_analyzer = 1, optimize_injective_functions_inside_uniq = 1; +-- Simple test EXPLAIN QUERY TREE SELECT uniqCombined(tuple('')) FROM numbers(1); - SELECT uniqCombined(tuple('')) FROM numbers(1); + +-- Test with chain of injective functions +EXPLAIN QUERY TREE SELECT uniqCombined(tuple(materialize(tuple(number)))) FROM numbers(10); +SELECT uniqCombined(tuple(materialize(toString(number)))) FROM numbers(10); + +-- No or partial optimization cases +EXPLAIN QUERY TREE SELECT uniq(abs(number)) FROM numbers(10); -- no elimination as `abs` is not injective +EXPLAIN QUERY TREE SELECT uniq(toString(abs(materialize(number)))) FROM numbers(10); -- only eliminate `toString` +EXPLAIN QUERY TREE SELECT uniq(tuple(number, 1)) FROM numbers(10); -- no elimination as `tuple` has multiple arguments diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh index 6bc3d03ac66..24803ed7420 100755 --- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh +++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh @@ -20,12 +20,12 @@ SETTINGS_ANALYZER="SETTINGS use_query_cache=1, max_threads=1, allow_experimental # Verify that the first query does two aggregations and the second query zero aggregations. Since query cache is currently not integrated # with EXPLAIN PLAN, we need to check the logs. -${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_NO_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l -${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_NO_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l +${CLICKHOUSE_CLIENT} --allow_repeated_settings --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_NO_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l +${CLICKHOUSE_CLIENT} --allow_repeated_settings --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_NO_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP QUERY CACHE" -${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l -${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l +${CLICKHOUSE_CLIENT} --allow_repeated_settings --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l +${CLICKHOUSE_CLIENT} --allow_repeated_settings --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP QUERY CACHE" diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 77ef213b36d..4a4e898c5bd 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -332,13 +332,12 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - Filter ((WHERE + (Projection + Before ORDER BY))) - Filter (HAVING) - Aggregating - Expression ((Before GROUP BY + Projection)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - ReadFromSystemNumbers + Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) + Aggregating + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 1 2 diff --git a/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh b/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh index aa1f635f380..be87ea057ca 100755 --- a/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh +++ b/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "create table t(ts DateTime64) engine=MergeTree order by ts as select * from numbers_mt(1e6);" +$CLICKHOUSE_CLIENT --optimize_trivial_insert_select 1 -q "create table t(ts DateTime64) engine=MergeTree order by ts as select * from numbers_mt(1e6);" max_block_size=8192 diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql index a90e56a9822..55723360c38 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql @@ -6,6 +6,7 @@ set max_threads = 16; set allow_aggregate_partitions_independently = 1; set force_aggregate_partitions_independently = 1; set optimize_use_projections = 0; +set optimize_trivial_insert_select = 1; set allow_prefetched_read_pool_for_remote_filesystem = 0; set allow_prefetched_read_pool_for_local_filesystem = 0; diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference index 88c910e0313..4d5a7447a49 100644 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference @@ -10,7 +10,7 @@ SELECT key, value FROM dict_flat ORDER BY key ASC; 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 SecondUpdated @@ -27,7 +27,7 @@ SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -44,7 +44,7 @@ SELECT key, value FROM dict_hashed ORDER BY key ASC; 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -61,7 +61,7 @@ SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -78,7 +78,7 @@ SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -95,7 +95,7 @@ SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.sh b/tests/queries/0_stateless/02530_dictionaries_update_field.sh index 35881bdf896..9ced78a1196 100755 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.sh +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.sh @@ -60,7 +60,7 @@ for layout in "${layouts[@]}"; do INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); - SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; + SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; SELECT key, value FROM $dictionary_name ORDER BY key ASC; -- { echoOff } diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh index 9850406eb3a..dccb680be42 100755 --- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh @@ -30,7 +30,7 @@ run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas $CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" -$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x)" +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)" $CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key SELECT toString(number), number % 3 FROM numbers(1000)" function run_count_with_custom_key { @@ -41,6 +41,6 @@ run_count_with_custom_key "y" run_count_with_custom_key "cityHash64(y)" run_count_with_custom_key "cityHash64(y) + 1" -$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" $CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 9bb0c022752..70bcd7f255b 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -29,20 +29,16 @@ WHERE type_1 = \'all\' ExpressionTransform × 2 (Filter) FilterTransform × 2 - (Filter) - FilterTransform × 2 - (Filter) - FilterTransform × 2 - (Aggregating) - ExpressionTransform × 2 - AggregatingTransform × 2 - Copy 1 → 2 - (Expression) - ExpressionTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Filter) @@ -68,14 +64,10 @@ ExpressionTransform × 2 ExpressionTransform × 2 AggregatingTransform × 2 Copy 1 → 2 - (Filter) - FilterTransform - (Filter) - FilterTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Aggregating) diff --git a/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference index dd107065380..d391c365ea7 100644 --- a/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference +++ b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference @@ -1,10 +1,11 @@ Expression ((Project names + (Projection + ))) Header: number UInt64 Actions: INPUT : 0 -> __table2.number UInt64 : 0 - ALIAS __table2.number :: 0 -> number UInt64 : 1 - ALIAS number :: 1 -> __table1.number UInt64 : 0 - ALIAS __table1.number :: 0 -> number UInt64 : 1 -Positions: 1 + INPUT :: 1 -> ignore(2_UInt8) UInt8 : 1 + ALIAS __table2.number :: 0 -> number UInt64 : 2 + ALIAS number :: 2 -> __table1.number UInt64 : 0 + ALIAS __table1.number :: 0 -> number UInt64 : 2 +Positions: 2 Sorting (Sorting for ORDER BY) Header: ignore(2_UInt8) UInt8 __table2.number UInt64 diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql index ff8b9c71e92..741d0177971 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql +++ b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan, no-parallel +-- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan, no-parallel, no-sanitize-coverage -- no-parallel because the sets use a lot of memory, which may interfere with other tests diff --git a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference index e6c4d5768af..6e8325a2ff3 100644 --- a/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference +++ b/tests/queries/0_stateless/02675_predicate_push_down_filled_join_fix.reference @@ -24,10 +24,10 @@ Positions: 3 0 1 Actions: INPUT : 0 -> id UInt64 : 0 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 0_UInt8 UInt8 : 2 - ALIAS id :: 0 -> __table1.id UInt64 : 3 - ALIAS value :: 1 -> __table1.value String : 0 - FUNCTION equals(__table1.id : 3, 0_UInt8 :: 2) -> equals(__table1.id, 0_UInt8) UInt8 : 1 - Positions: 1 3 0 + ALIAS id : 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 4 + FUNCTION equals(id :: 0, 0_UInt8 :: 2) -> equals(__table1.id, 0_UInt8) UInt8 : 1 + Positions: 1 3 4 ReadFromMergeTree (default.test_table) Header: id UInt64 value String diff --git a/tests/queries/0_stateless/02725_agg_projection_resprect_PK.sql b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.sql index a2355f78f4c..459ebc1bc22 100644 --- a/tests/queries/0_stateless/02725_agg_projection_resprect_PK.sql +++ b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.sql @@ -20,6 +20,7 @@ CREATE TABLE t0 ) ENGINE = MergeTree ORDER BY (c1, c2) settings min_bytes_for_wide_part = 10485760, min_rows_for_wide_part = 0; +SET optimize_trivial_insert_select = 1; INSERT INTO t0 SELECT number, -number, diff --git a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh index 912cdd3d1e8..c69c635f6ed 100755 --- a/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh +++ b/tests/queries/0_stateless/02789_filesystem_cache_alignment.sh @@ -14,6 +14,7 @@ SETTINGS disk = disk(type = cache, max_size = '1Gi', max_file_segment_size = '40Mi', boundary_alignment = '20Mi', + background_download_threads = 2, path = '$CLICKHOUSE_TEST_UNIQUE_NAME', disk = 's3_disk'); diff --git a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql index 5b9976714ea..b0c08134816 100644 --- a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql +++ b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_display_footer_column_names=0; SET output_format_pretty_color=1; SET read_in_order_two_level_merge_threshold=1000000; diff --git a/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.reference b/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.reference new file mode 100644 index 00000000000..e7d63a6add3 --- /dev/null +++ b/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.reference @@ -0,0 +1,2 @@ +rdb1_default 1 +rdb3_default 1 diff --git a/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.sh b/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.sh new file mode 100755 index 00000000000..3c14c569257 --- /dev/null +++ b/tests/queries/0_stateless/02859_replicated_db_name_zookeeper.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "CREATE DATABASE rdb1_$CLICKHOUSE_DATABASE ON CLUSTER test_shard_localhost ENGINE=Replicated('/clickhouse/databases/{uuid}', '{shard}', '{replica}')"; +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "CREATE DATABASE rdb2_$CLICKHOUSE_DATABASE ON CLUSTER test_shard_localhost ENGINE=Replicated('/clickhouse/databases/{uuid}', '{shard}', '{replica}')"; +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "RENAME DATABASE rdb2_$CLICKHOUSE_DATABASE to rdb3_$CLICKHOUSE_DATABASE ON CLUSTER test_shard_localhost" + +$CLICKHOUSE_CLIENT -q " +SELECT + db_name, + t1.uuid = t2.uuid +FROM +( + WITH '/clickhouse/databases/' AS prefix + SELECT + toUUID(substr(path, length(prefix) + 1)) AS uuid, + value AS db_name + FROM system.zookeeper + WHERE (path IN ( + SELECT concat(path, name) + FROM system.zookeeper + WHERE path = prefix + )) AND (name = 'first_replica_database_name') +) AS t1 +INNER JOIN system.databases AS t2 USING (uuid) +WHERE db_name like '%$CLICKHOUSE_DATABASE%' +ORDER BY db_name +" + +$CLICKHOUSE_CLIENT -q "DROP DATABASE rdb1_$CLICKHOUSE_DATABASE" +$CLICKHOUSE_CLIENT -q "DROP DATABASE rdb3_$CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02864_statistic_exception.sql b/tests/queries/0_stateless/02864_statistic_exception.sql deleted file mode 100644 index 092fa9bda85..00000000000 --- a/tests/queries/0_stateless/02864_statistic_exception.sql +++ /dev/null @@ -1,53 +0,0 @@ -DROP TABLE IF EXISTS t1; - -CREATE TABLE t1 -( - a Float64 STATISTIC(tdigest), - b Int64 STATISTIC(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } - -SET allow_experimental_statistic = 1; - -CREATE TABLE t1 -( - a Float64 STATISTIC(tdigest), - b Int64, - pk String STATISTIC(tdigest), -) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTIC } - -CREATE TABLE t1 -( - a Float64 STATISTIC(tdigest, tdigest(10)), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } - -CREATE TABLE t1 -( - a Float64 STATISTIC(xyz), - b Int64, -) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } - -CREATE TABLE t1 -( - a Float64, - b Int64, - pk String, -) Engine = MergeTree() ORDER BY pk; - -ALTER TABLE t1 ADD STATISTIC a TYPE xyz; -- { serverError INCORRECT_QUERY } -ALTER TABLE t1 ADD STATISTIC a TYPE tdigest; -ALTER TABLE t1 ADD STATISTIC a TYPE tdigest; -- { serverError ILLEGAL_STATISTIC } -ALTER TABLE t1 ADD STATISTIC pk TYPE tdigest; -- { serverError ILLEGAL_STATISTIC } -ALTER TABLE t1 DROP STATISTIC b TYPE tdigest; -- { serverError ILLEGAL_STATISTIC } -ALTER TABLE t1 DROP STATISTIC a TYPE tdigest; -ALTER TABLE t1 DROP STATISTIC a TYPE tdigest; -- { serverError ILLEGAL_STATISTIC } -ALTER TABLE t1 CLEAR STATISTIC a TYPE tdigest; -- { serverError ILLEGAL_STATISTIC } -ALTER TABLE t1 MATERIALIZE STATISTIC b TYPE tdigest; -- { serverError ILLEGAL_STATISTIC } - -ALTER TABLE t1 ADD STATISTIC a TYPE tdigest; -ALTER TABLE t1 ADD STATISTIC b TYPE tdigest; -ALTER TABLE t1 MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; -ALTER TABLE t1 MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } - -DROP TABLE t1; diff --git a/tests/queries/0_stateless/02864_statistics_exception.reference b/tests/queries/0_stateless/02864_statistics_exception.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql new file mode 100644 index 00000000000..c531d39cd69 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_exception.sql @@ -0,0 +1,57 @@ +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + pk String, +) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + +SET allow_experimental_statistics = 1; + +CREATE TABLE t1 +( + a Float64 STATISTICS(tdigest), + b Int64, + pk String STATISTICS(tdigest), +) Engine = MergeTree() ORDER BY pk; -- { serverError ILLEGAL_STATISTICS } + +CREATE TABLE t1 +( + a Float64 STATISTICS(tdigest, tdigest(10)), + b Int64, +) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + +CREATE TABLE t1 +( + a Float64 STATISTICS(xyz), + b Int64, +) Engine = MergeTree() ORDER BY pk; -- { serverError INCORRECT_QUERY } + +CREATE TABLE t1 +( + a Float64, + b Int64, + pk String, +) Engine = MergeTree() ORDER BY pk; + +ALTER TABLE t1 ADD STATISTICS a TYPE xyz; -- { serverError INCORRECT_QUERY } +ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; +ALTER TABLE t1 ADD STATISTICS IF NOT EXISTS a TYPE tdigest; +ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +-- Statistics can be created only on integer columns +ALTER TABLE t1 MODIFY STATISTICS a TYPE tdigest; +ALTER TABLE t1 ADD STATISTICS pk TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE t1 DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE t1 DROP STATISTICS a; +ALTER TABLE t1 DROP STATISTICS IF EXISTS a; +ALTER TABLE t1 CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE t1 CLEAR STATISTICS IF EXISTS a; +ALTER TABLE t1 MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } + +ALTER TABLE t1 ADD STATISTICS a TYPE tdigest; +ALTER TABLE t1 ADD STATISTICS b TYPE tdigest; +ALTER TABLE t1 MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; +ALTER TABLE t1 MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } + +DROP TABLE t1; diff --git a/tests/queries/0_stateless/02864_statistic_operate.reference b/tests/queries/0_stateless/02864_statistics_operate.reference similarity index 58% rename from tests/queries/0_stateless/02864_statistic_operate.reference rename to tests/queries/0_stateless/02864_statistics_operate.reference index 3e291485031..6398a9bd000 100644 --- a/tests/queries/0_stateless/02864_statistic_operate.reference +++ b/tests/queries/0_stateless/02864_statistics_operate.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.t1\n(\n `a` Float64 STATISTIC(tdigest),\n `b` Int64 STATISTIC(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 After insert Prewhere info Prewhere filter @@ -12,7 +12,7 @@ After drop statistic 10 CREATE TABLE default.t1\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 After add statistic -CREATE TABLE default.t1\n(\n `a` Float64 STATISTIC(tdigest),\n `b` Int64 STATISTIC(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 After materialize statistic Prewhere info Prewhere filter @@ -23,7 +23,7 @@ After merge Prewhere filter Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) 20 -CREATE TABLE default.t1\n(\n `a` Float64 STATISTIC(tdigest),\n `c` Int64 STATISTIC(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 After rename Prewhere info Prewhere filter diff --git a/tests/queries/0_stateless/02864_statistic_operate.sql b/tests/queries/0_stateless/02864_statistics_operate.sql similarity index 87% rename from tests/queries/0_stateless/02864_statistic_operate.sql rename to tests/queries/0_stateless/02864_statistics_operate.sql index 5f1c30f8eec..bf69c11bc91 100644 --- a/tests/queries/0_stateless/02864_statistic_operate.sql +++ b/tests/queries/0_stateless/02864_statistics_operate.sql @@ -1,12 +1,12 @@ DROP TABLE IF EXISTS t1; -SET allow_experimental_statistic = 1; -SET allow_statistic_optimize = 1; +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; CREATE TABLE t1 ( - a Float64 STATISTIC(tdigest), - b Int64 STATISTIC(tdigest), + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), pk String, ) Engine = MergeTree() ORDER BY pk SETTINGS min_bytes_for_wide_part = 0; @@ -20,7 +20,7 @@ SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions= SELECT count(*) FROM t1 WHERE b < 10 and a < 10; SELECT count(*) FROM t1 WHERE b < NULL and a < '10'; -ALTER TABLE t1 DROP STATISTIC a, b TYPE tdigest; +ALTER TABLE t1 DROP STATISTICS a, b; SELECT 'After drop statistic'; SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; @@ -28,13 +28,13 @@ SELECT count(*) FROM t1 WHERE b < 10 and a < 10; SHOW CREATE TABLE t1; -ALTER TABLE t1 ADD STATISTIC a, b TYPE tdigest; +ALTER TABLE t1 ADD STATISTICS a, b TYPE tdigest; SELECT 'After add statistic'; SHOW CREATE TABLE t1; -ALTER TABLE t1 MATERIALIZE STATISTIC a, b TYPE tdigest; +ALTER TABLE t1 MATERIALIZE STATISTICS a, b; INSERT INTO t1 select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; SELECT 'After materialize statistic'; diff --git a/tests/queries/0_stateless/02864_statistics_uniq.reference b/tests/queries/0_stateless/02864_statistics_uniq.reference new file mode 100644 index 00000000000..77786dbdd8c --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_uniq.reference @@ -0,0 +1,35 @@ +CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest, uniq),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) +After modify TDigest + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(c, -1), less(a, 10), less(b, 10)) (removed) +After drop + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(c, -1), less(b, 10)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_uniq.sql b/tests/queries/0_stateless/02864_statistics_uniq.sql new file mode 100644 index 00000000000..d496392668b --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_uniq.sql @@ -0,0 +1,72 @@ +DROP TABLE IF EXISTS t1; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET mutations_sync = 1; + +CREATE TABLE t1 +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + c Int64 STATISTICS(tdigest, uniq), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; + +SHOW CREATE TABLE t1; + +INSERT INTO t1 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; +INSERT INTO t1 select 0, 0, 11, generateUUIDv4(); + +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +OPTIMIZE TABLE t1 FINAL; + +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +SELECT 'After modify TDigest'; +ALTER TABLE t1 MODIFY STATISTICS c TYPE TDigest; +ALTER TABLE t1 MATERIALIZE STATISTICS c; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + + +ALTER TABLE t1 DROP STATISTICS c; + +SELECT 'After drop'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +SET allow_suspicious_low_cardinality_types=1; +CREATE TABLE t2 +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + c LowCardinality(Int64) STATISTICS(tdigest, uniq), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; +INSERT INTO t2 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; + +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +CREATE TABLE t3 +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + c Nullable(Int64) STATISTICS(tdigest, uniq), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; +INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; + +DROP TABLE IF EXISTS t3; diff --git a/tests/queries/0_stateless/02867_page_cache.reference b/tests/queries/0_stateless/02867_page_cache.reference deleted file mode 100644 index c3d6484a175..00000000000 --- a/tests/queries/0_stateless/02867_page_cache.reference +++ /dev/null @@ -1,21 +0,0 @@ -cold read 54975576145920 -PageCacheBytesUnpinnedRoundedToHugePages 1 -PageCacheBytesUnpinnedRoundedToPages 1 -PageCacheChunkMisses 1 -ReadBufferFromS3Bytes 1 -repeat read 1 54975576145920 -PageCacheBytesUnpinnedRoundedToHugePages 1 -PageCacheBytesUnpinnedRoundedToPages 1 -PageCacheChunkDataHits 1 -dropped and bypassed cache 54975576145920 -PageCacheChunkMisses 1 -ReadBufferFromS3Bytes 1 -repeat read 2 54975576145920 -PageCacheBytesUnpinnedRoundedToHugePages 1 -PageCacheBytesUnpinnedRoundedToPages 1 -PageCacheChunkMisses 1 -ReadBufferFromS3Bytes 1 -repeat read 3 54975576145920 -PageCacheBytesUnpinnedRoundedToHugePages 1 -PageCacheBytesUnpinnedRoundedToPages 1 -PageCacheChunkDataHits 1 diff --git a/tests/queries/0_stateless/02867_page_cache.sql b/tests/queries/0_stateless/02867_page_cache.sql deleted file mode 100644 index f1882de4af6..00000000000 --- a/tests/queries/0_stateless/02867_page_cache.sql +++ /dev/null @@ -1,106 +0,0 @@ --- Tags: no-fasttest, no-parallel --- no-fasttest because we need an S3 storage policy --- no-parallel because we look at server-wide counters about page cache usage - -set use_page_cache_for_disks_without_file_cache = 1; -set page_cache_inject_eviction = 0; -set enable_filesystem_cache = 0; -set use_uncompressed_cache = 0; - -create table events_snapshot engine Memory as select * from system.events; -create view events_diff as - -- round all stats to 70 MiB to leave a lot of leeway for overhead - with if(event like '%Bytes%', 70*1024*1024, 35) as granularity, - -- cache hits counter can vary a lot depending on other settings: - -- e.g. if merge_tree_min_bytes_for_concurrent_read is small, multiple threads will read each chunk - -- so we just check that the value is not too low - if(event in ( - 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages', - 'PageCacheChunkDataHits'), 1, 1000) as clamp - select event, min2(intDiv(new.value - old.value, granularity), clamp) as diff - from system.events new - left outer join events_snapshot old - on old.event = new.event - where diff != 0 and - event in ( - 'ReadBufferFromS3Bytes', 'PageCacheChunkMisses', 'PageCacheChunkDataMisses', - 'PageCacheChunkDataHits', 'PageCacheChunkDataPartialHits', - 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages') - order by event; - -drop table if exists page_cache_03055; -create table page_cache_03055 (k Int64 CODEC(NONE)) engine MergeTree order by k settings storage_policy = 's3_cache'; - --- Write an 80 MiB file (40 x 2 MiB chunks), and a few small files. -system stop merges page_cache_03055; -insert into page_cache_03055 select * from numbers(10485760) settings max_block_size=100000000, preferred_block_size_bytes=1000000000; - -select * from events_diff; -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - -system start merges page_cache_03055; -optimize table page_cache_03055 final; -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - --- Cold read, should miss cache. (Populating cache on write is not implemented yet.) - -select 'cold read', sum(k) from page_cache_03055; - -select * from events_diff where event not in ('PageCacheChunkDataHits'); -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - --- Repeat read, should hit cache. - -select 'repeat read 1', sum(k) from page_cache_03055; - -select * from events_diff; -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - --- Drop cache and read again, should miss. Also don't write to cache. - -system drop page cache; - -select 'dropped and bypassed cache', sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; - --- Data could be read multiple times because we're not writing to cache. --- (Not checking PageCacheBytesUnpinned* because it's unreliable in this case because of an intentional race condition, see PageCache::evictChunk.) -select event, if(event in ('PageCacheChunkMisses', 'ReadBufferFromS3Bytes'), diff >= 1, diff) from events_diff where event not in ('PageCacheChunkDataHits', 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages'); -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - --- Repeat read, should still miss, but populate cache. - -select 'repeat read 2', sum(k) from page_cache_03055; - -select * from events_diff where event not in ('PageCacheChunkDataHits'); -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - --- Read again, hit the cache. - -select 'repeat read 3', sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1; - -select * from events_diff; -truncate table events_snapshot; -insert into events_snapshot select * from system.events; - - --- Known limitation: cache is not invalidated if a table is dropped and created again at the same path. --- set allow_deprecated_database_ordinary=1; --- create database test_03055 engine = Ordinary; --- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache'; --- insert into test_03055.t values (1); --- select * from test_03055.t; --- drop table test_03055.t; --- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache'; --- insert into test_03055.t values (2); --- select * from test_03055.t; - - -drop table events_snapshot; -drop table page_cache_03055; -drop view events_diff; diff --git a/tests/queries/0_stateless/02870_move_partition_to_volume_io_throttling.sql b/tests/queries/0_stateless/02870_move_partition_to_volume_io_throttling.sql index b03d9849a80..2d76ab0d8e3 100644 --- a/tests/queries/0_stateless/02870_move_partition_to_volume_io_throttling.sql +++ b/tests/queries/0_stateless/02870_move_partition_to_volume_io_throttling.sql @@ -2,6 +2,8 @@ -- Tag: no-fasttest -- requires S3 -- Tag: no-replicated-database -- ALTER MOVE PARTITION TO should not be replicated (will be fixed separatelly) +SET optimize_trivial_insert_select = 1; + CREATE TABLE test_move_partition_throttling (key UInt64 CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple() SETTINGS storage_policy='local_remote'; INSERT INTO test_move_partition_throttling SELECT number FROM numbers(1e6); SELECT disk_name, partition, rows FROM system.parts WHERE database = currentDatabase() AND table = 'test_move_partition_throttling' and active; diff --git a/tests/queries/0_stateless/02882_clickhouse_keeper_client_no_confirmation.sh b/tests/queries/0_stateless/02882_clickhouse_keeper_client_no_confirmation.sh index 4bda0cfa5b0..43f86b8a58a 100755 --- a/tests/queries/0_stateless/02882_clickhouse_keeper_client_no_confirmation.sh +++ b/tests/queries/0_stateless/02882_clickhouse_keeper_client_no_confirmation.sh @@ -6,8 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) path="/test-keeper-client-$CLICKHOUSE_DATABASE" -$CLICKHOUSE_KEEPER_CLIENT -q "rm $path" >& /dev/null +$CLICKHOUSE_KEEPER_CLIENT -q "rm '$path'" >& /dev/null -$CLICKHOUSE_KEEPER_CLIENT -q "create $path 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "rmr $path" -$CLICKHOUSE_KEEPER_CLIENT -q "get $path" 2>&1 +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "rmr '$path'" +$CLICKHOUSE_KEEPER_CLIENT -q "get '$path'" 2>&1 diff --git a/tests/queries/0_stateless/02883_zookeeper_finalize_stress.sh b/tests/queries/0_stateless/02883_zookeeper_finalize_stress.sh index dc7d67fbdd4..c883cd8f58a 100755 --- a/tests/queries/0_stateless/02883_zookeeper_finalize_stress.sh +++ b/tests/queries/0_stateless/02883_zookeeper_finalize_stress.sh @@ -7,4 +7,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -yes /keeper/api_version | head -n1000 | xargs -P30 -i $CLICKHOUSE_KEEPER_CLIENT -q 'get {}' > /dev/null +yes /keeper/api_version | head -n1000 | xargs -P30 -i $CLICKHOUSE_KEEPER_CLIENT -q "get '{}'" > /dev/null diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference index 0589fdeef04..a03343c8cb3 100644 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference @@ -35,5 +35,8 @@ OK 2 2 6 6 9 9 +===== TestInsertChain ===== 1000 1000 +===== TestOnCluster ===== +1 diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index f32aee44bee..dd869cd9988 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -260,6 +260,8 @@ EOF ${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_row_2" +echo "===== TestInsertChain =====" + ${CLICKHOUSE_CLIENT} --multiquery < foo.seq SETTINGS final = 1; + +-- Same problem possible can happen with array join +DROP TABLE IF EXISTS t; CREATE TABLE t (k1 UInt64, k2 UInt64, v UInt64) ENGINE = ReplacingMergeTree() ORDER BY (k1, k2); SET optimize_on_insert = 0; INSERT INTO t VALUES (1, 2, 3) (1, 2, 4) (2, 3, 4), (2, 3, 5); diff --git a/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference b/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference index caf11f5c15a..e537236478d 100644 --- a/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference +++ b/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference @@ -1,2 +1,3 @@ 1 2 3 1 5 +300 diff --git a/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql b/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql index 5fb628eeb67..5440872e052 100644 --- a/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql +++ b/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql @@ -30,3 +30,7 @@ WHERE (time_stamp_utc >= toDateTime('2024-04-25 00:00:00')) AND (time_stamp_utc GROUP BY time_stamp_utc ORDER BY Impressions DESC LIMIT 1000; + +drop table test_table; +create table test_table engine MergeTree order by sum as select 100 as sum union all select 200 as sum; +select sum as sum from (select sum(sum) as sum from test_table); diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference index 90c5e0e99a5..d4191af1594 100644 --- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference +++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference @@ -1,2 +1,2 @@ -1048576 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection_sql 5 5000 0 16 -1048576 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection 5 5000 0 16 +1048576 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection_sql 0 5000 0 16 +1048576 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection 0 5000 0 16 diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh index 144831a2cdc..a247c99a818 100755 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-fasttest +# Tags: long, zookeeper, no-parallel, no-fasttest, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02915_analyzer_fuzz_1.reference b/tests/queries/0_stateless/02915_analyzer_fuzz_1.reference new file mode 100644 index 00000000000..ac3f57c1a2e --- /dev/null +++ b/tests/queries/0_stateless/02915_analyzer_fuzz_1.reference @@ -0,0 +1 @@ +With ba\0 diff --git a/tests/queries/0_stateless/02915_analyzer_fuzz_1.sql b/tests/queries/0_stateless/02915_analyzer_fuzz_1.sql new file mode 100644 index 00000000000..94849453063 --- /dev/null +++ b/tests/queries/0_stateless/02915_analyzer_fuzz_1.sql @@ -0,0 +1,2 @@ +set allow_experimental_analyzer=1; +SELECT concat('With ', materialize(_CAST('ba\0', 'LowCardinality(FixedString(3))'))) AS `concat('With ', materialize(CAST('ba\\0', 'LowCardinality(FixedString(3))')))` FROM system.one GROUP BY 'With '; diff --git a/tests/queries/0_stateless/02918_optimize_count_for_merge_tables.reference b/tests/queries/0_stateless/02918_optimize_count_for_merge_tables.reference index 786a6b3bf25..7278018f1d6 100644 --- a/tests/queries/0_stateless/02918_optimize_count_for_merge_tables.reference +++ b/tests/queries/0_stateless/02918_optimize_count_for_merge_tables.reference @@ -7,6 +7,9 @@ Expression ((Projection + Before ORDER BY)) Aggregating Expression (Before GROUP BY) ReadFromMerge - ReadFromMergeTree (default.mt1) - ReadFromMergeTree (default.mt2) - ReadFromStorage (TinyLog) + Expression + ReadFromMergeTree (default.mt1) + Expression + ReadFromMergeTree (default.mt2) + Expression + ReadFromStorage (TinyLog) diff --git a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh index bb013dccb65..d1cbc54d294 100755 --- a/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh +++ b/tests/queries/0_stateless/02922_deduplication_with_zero_copy.sh @@ -31,11 +31,11 @@ create table r2 (n int) function get_shared_locks() { table_shared_id="$1" - for part in $($CLICKHOUSE_KEEPER_CLIENT -q "ls /clickhouse/zero_copy/zero_copy_s3/${table_shared_id}") + for part in $($CLICKHOUSE_KEEPER_CLIENT -q "ls '/clickhouse/zero_copy/zero_copy_s3/${table_shared_id}'") do - for blob in $($CLICKHOUSE_KEEPER_CLIENT -q "ls /clickhouse/zero_copy/zero_copy_s3/${table_shared_id}/${part}") + for blob in $($CLICKHOUSE_KEEPER_CLIENT -q "ls '/clickhouse/zero_copy/zero_copy_s3/${table_shared_id}/${part}'") do - for lock in $($CLICKHOUSE_KEEPER_CLIENT -q "ls /clickhouse/zero_copy/zero_copy_s3/${table_shared_id}/${part}/${blob}") + for lock in $($CLICKHOUSE_KEEPER_CLIENT -q "ls '/clickhouse/zero_copy/zero_copy_s3/${table_shared_id}/${part}/${blob}'") do echo "/clickhouse/zero_copy/zero_copy_s3/${table_shared_id}/${part}/${blob}/${lock}" done @@ -48,7 +48,7 @@ function filter_temporary_locks() { while read -r lock do - owner="$($CLICKHOUSE_KEEPER_CLIENT -q "get_stat ${lock}" | grep 'ephemeralOwner' | sed 's/.*= //')" + owner="$($CLICKHOUSE_KEEPER_CLIENT -q "get_stat '${lock}'" | grep 'ephemeralOwner' | sed 's/.*= //')" if [[ "${owner}" -eq "0" ]] then echo "${lock}" @@ -58,9 +58,9 @@ function filter_temporary_locks() function insert_duplicates() { - $CLICKHOUSE_CLIENT -q "insert into r1 values(1);" --send_logs_level="error" & + $CLICKHOUSE_CLIENT -q "insert into r1 values(1);" --allow_repeated_settings --send_logs_level="error" & - $CLICKHOUSE_CLIENT -q "insert into r2 values(1);" --send_logs_level="error" + $CLICKHOUSE_CLIENT -q "insert into r2 values(1);" --allow_repeated_settings --send_logs_level="error" wait @@ -111,7 +111,7 @@ export -f insert_duplicates export -f get_shared_locks export -f loop -table_shared_id="$($CLICKHOUSE_KEEPER_CLIENT -q "get /test/02922/${CLICKHOUSE_DATABASE}/table/table_shared_id")" +table_shared_id="$($CLICKHOUSE_KEEPER_CLIENT -q "get '/test/02922/${CLICKHOUSE_DATABASE}/table/table_shared_id'")" exit_code=0 timeout 40 bash -c "loop '${table_shared_id}'" || exit_code="${?}" @@ -128,17 +128,17 @@ function list_keeper_nodes() { table_shared_id=$1 echo "zero_copy:" - $CLICKHOUSE_KEEPER_CLIENT -q "ls /clickhouse/zero_copy/zero_copy_s3" | grep -o "${table_shared_id}" | \ + $CLICKHOUSE_KEEPER_CLIENT -q "ls '/clickhouse/zero_copy/zero_copy_s3'" | grep -o "${table_shared_id}" | \ sed "s/${table_shared_id}//g" || : echo "tables:" - $CLICKHOUSE_KEEPER_CLIENT -q "ls /test/02922/${CLICKHOUSE_DATABASE}" | grep -o "table" || : + $CLICKHOUSE_KEEPER_CLIENT -q "ls '/test/02922/${CLICKHOUSE_DATABASE}'" | grep -o "table" || : } list_keeper_nodes "${table_shared_id}" -$CLICKHOUSE_CLIENT -nm -q "drop table r1;" --send_logs_level="error" & -$CLICKHOUSE_CLIENT -nm -q "drop table r2;" --send_logs_level="error" & +$CLICKHOUSE_CLIENT -nm -q "drop table r1;" --allow_repeated_settings --send_logs_level="error" & +$CLICKHOUSE_CLIENT -nm -q "drop table r2;" --allow_repeated_settings --send_logs_level="error" & wait list_keeper_nodes "${table_shared_id}" diff --git a/tests/queries/0_stateless/02922_server_exit_code.sh b/tests/queries/0_stateless/02922_server_exit_code.sh index 60049902410..ded0dc4763f 100755 --- a/tests/queries/0_stateless/02922_server_exit_code.sh +++ b/tests/queries/0_stateless/02922_server_exit_code.sh @@ -7,6 +7,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # We will check that the server's exit code corresponds to the exception code if it was terminated after exception. # In this example, we provide an invalid path to the server's config, ignore its logs and check the exit code. -# The exception code is 400 = CANNOT_STAT, so the exit code will be 400 % 256. +# The exception code is 76 = CANNOT_OPEN_FILE, so the exit code will be 76 % 256. -${CLICKHOUSE_SERVER_BINARY} -- --path /dev/null 2>/dev/null; [[ "$?" == "$((400 % 256))" ]] && echo 'Ok' || echo 'Fail' +${CLICKHOUSE_SERVER_BINARY} -- --path /dev/null 2>/dev/null; [[ "$?" == "$((76 % 256))" ]] && echo 'Ok' || echo 'Fail' diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference index 3124698d218..f9b72ba9c6a 100644 --- a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference +++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference @@ -245,21 +245,21 @@ EXPLAIN SYNTAX (SELECT 2 * count(uint64) - sum(uint64) From test_table); SELECT (2 * count(uint64)) - sum(uint64) FROM test_table SELECT sum(float64 + 2) From test_table; -26.5 +26.875 SELECT sum(2 + float64) From test_table; -26.5 +26.875 SELECT sum(float64 - 2) From test_table; -6.5 +6.875 SELECT sum(2 - float64) From test_table; --6.5 +-6.875 SELECT sum(float64) + 2 * count(float64) From test_table; -26.5 +26.875 SELECT 2 * count(float64) + sum(float64) From test_table; -26.5 +26.875 SELECT sum(float64) - 2 * count(float64) From test_table; -6.5 +6.875 SELECT 2 * count(float64) - sum(float64) From test_table; --6.5 +-6.875 EXPLAIN SYNTAX (SELECT sum(float64 + 2) From test_table); SELECT sum(float64) + (2 * count(float64)) FROM test_table @@ -375,25 +375,25 @@ EXPLAIN SYNTAX (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - SELECT ((2 * count(uint64)) - sum(uint64)) + ((3 * count(uint64)) - sum(uint64)) FROM test_table SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; -58 +58.75 SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; -5 SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; -8 +8.75 SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; 5 SELECT sum(2 - float64) - sum(3 - float64) From test_table; -5 SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; -58 +58.75 SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; -5 SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; -8 +8.75 SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; 5 SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; --8 +-8.75 EXPLAIN SYNTAX (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); SELECT (sum(float64) + (2 * count(float64))) + (sum(float64) + (3 * count(float64))) FROM test_table diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql index c7b0ff82442..94baee6f1ba 100644 --- a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql +++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql @@ -23,11 +23,12 @@ CREATE TABLE test_table decimal32 Decimal32(5), ) ENGINE=MergeTree ORDER BY uint64; -INSERT INTO test_table VALUES (1, 1.1, 1.11); -INSERT INTO test_table VALUES (2, 2.2, 2.22); -INSERT INTO test_table VALUES (3, 3.3, 3.33); -INSERT INTO test_table VALUES (4, 4.4, 4.44); -INSERT INTO test_table VALUES (5, 5.5, 5.55); +-- Use Float64 numbers divisible by 1/16 (or some other small power of two), so that their sum doesn't depend on summation order. +INSERT INTO test_table VALUES (1, 1.125, 1.11); +INSERT INTO test_table VALUES (2, 2.250, 2.22); +INSERT INTO test_table VALUES (3, 3.375, 3.33); +INSERT INTO test_table VALUES (4, 4.500, 4.44); +INSERT INTO test_table VALUES (5, 5.625, 5.55); -- { echoOn } SELECT sum(uint64 + 1 AS i) from test_table where i > 0; diff --git a/tests/queries/0_stateless/02941_variant_type_1.sh b/tests/queries/0_stateless/02941_variant_type_1.sh index 22ca909a26e..723de45eaad 100755 --- a/tests/queries/0_stateless/02941_variant_type_1.sh +++ b/tests/queries/0_stateless/02941_variant_type_1.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1 --index_granularity_bytes=10485760 --index_granularity=8192" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1" function test1_insert() { @@ -115,11 +115,11 @@ run 0 $CH_CLIENT -q "drop table test;" echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000, index_granularity_bytes=10485760, index_granularity=8192;" run 1 $CH_CLIENT -q "drop table test;" echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, index_granularity_bytes=10485760, index_granularity=8192;" run 1 $CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index 91ba0285bd8..f43cd2bb0d6 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1 --index_granularity_bytes=10485760 --index_granularity=8192" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1" function test4_insert() { @@ -61,11 +61,11 @@ run 0 $CH_CLIENT -q "drop table test;" echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000, index_granularity_bytes=10485760, index_granularity=8192;" run 1 $CH_CLIENT -q "drop table test;" echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, index_granularity_bytes=10485760, index_granularity=8192;" run 1 $CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh index 8a039a02d6d..f4b2b304f56 100755 --- a/tests/queries/0_stateless/02941_variant_type_3.sh +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1 --index_granularity_bytes=10485760 --index_granularity=8192 " +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1" function test5_insert() { @@ -63,11 +63,11 @@ run 0 $CH_CLIENT -q "drop table test;" echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000, index_granularity_bytes=10485760, index_granularity=8192;" run 1 $CH_CLIENT -q "drop table test;" echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, index_granularity_bytes=10485760, index_granularity=8192;" run 1 $CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index e38db8fda54..f9a16847864 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -7,7 +7,8 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1 --index_granularity_bytes=10485760 --index_granularity=8192 " + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1" function test6_insert() { @@ -57,11 +58,11 @@ run 0 $CH_CLIENT -q "drop table test;" echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000, index_granularity_bytes=10485760, index_granularity=8192;" run 1 $CH_CLIENT -q "drop table test;" echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, index_granularity_bytes=10485760, index_granularity=8192;" run 1 $CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02942_variant_cast.sql b/tests/queries/0_stateless/02942_variant_cast.sql index fc2d1d63657..33587e3e438 100644 --- a/tests/queries/0_stateless/02942_variant_cast.sql +++ b/tests/queries/0_stateless/02942_variant_cast.sql @@ -1,5 +1,4 @@ set allow_experimental_variant_type=1; -set allow_experimental_analyzer=0; -- It's currently doesn't work with analyzer because of the way it works with constants, but it will be refactored and fixed in future select NULL::Variant(String, UInt64); select 42::UInt64::Variant(String, UInt64); diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference index 0e1954cde62..5b7ad7ddce0 100644 --- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference +++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference @@ -1,5 +1,5 @@ -1 Hello ClickHouse -2 Hello World +1 Well, Hello ClickHouse ! +2 Well, Hello World ! 1 Hello ClickHouse 2 Hello World Granules: 6/6 @@ -11,9 +11,9 @@ Granules: 6/6 Granules: 2/6 --- -1 Hello ClickHouse -2 Hello World -6 World Champion +1 Well, Hello ClickHouse ! +2 Well, Hello World ! +6 True World Champion 1 Hello ClickHouse 2 Hello World 6 World Champion @@ -26,7 +26,7 @@ Granules: 6/6 Granules: 3/6 --- -5 OLAP Database +5 Its An OLAP Database 5 OLAP Database Granules: 6/6 Granules: 1/6 diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql index 49d39c601ef..42175cbb2c6 100644 --- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql +++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql @@ -21,21 +21,22 @@ ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 1; -INSERT INTO tokenbf_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion'); +INSERT INTO tokenbf_tab VALUES (1, 'Well, Hello ClickHouse !'), (2, 'Well, Hello World !'), (3, 'Good Weather !'), (4, 'Say Hello !'), (5, 'Its An OLAP Database'), (6, 'True World Champion'); INSERT INTO ngrambf_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion'); -SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id; +SELECT * FROM tokenbf_tab WHERE match(str, ' Hello (ClickHouse|World) ') ORDER BY id; SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id; -- Read 2/6 granules -- Required string: 'Hello ' -- Alternatives: 'Hello ClickHouse', 'Hello World' +-- Surrounded by spaces for tokenbf SELECT * FROM ( EXPLAIN PLAN indexes=1 - SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id + SELECT * FROM tokenbf_tab WHERE match(str, ' Hello (ClickHouse|World) ') ORDER BY id ) WHERE explain LIKE '%Granules: %' @@ -46,7 +47,7 @@ SELECT * FROM ( EXPLAIN PLAN indexes=1 - SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id + SELECT * FROM tokenbf_tab WHERE match(str, ' Hello (ClickHouse|World) ') ORDER BY id ) WHERE explain LIKE '%Granules: %' @@ -78,18 +79,19 @@ SETTINGS SELECT '---'; -SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id; +SELECT * FROM tokenbf_tab WHERE match(str, '.* (ClickHouse|World) ') ORDER BY id; SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id; -- Read 3/6 granules -- Required string: - -- Alternatives: 'ClickHouse', 'World' +-- Surrounded by spaces for tokenbf SELECT * FROM ( EXPLAIN PLAN indexes = 1 - SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id + SELECT * FROM tokenbf_tab WHERE match(str, '.* (ClickHouse|World) ') ORDER BY id ) WHERE explain LIKE '%Granules: %' @@ -100,7 +102,7 @@ SELECT * FROM ( EXPLAIN PLAN indexes = 1 - SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id + SELECT * FROM tokenbf_tab WHERE match(str, '.* (ClickHouse|World) ') ORDER BY id ) WHERE explain LIKE '%Granules: %' @@ -131,18 +133,19 @@ SETTINGS SELECT '---'; -SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP.*') ORDER BY id; +SELECT * FROM tokenbf_tab WHERE match(str, ' OLAP .*') ORDER BY id; SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP.*') ORDER BY id; -- Read 1/6 granules -- Required string: 'OLAP' -- Alternatives: - +-- Surrounded by spaces for tokenbf SELECT * FROM ( EXPLAIN PLAN indexes = 1 - SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id + SELECT * FROM tokenbf_tab WHERE match(str, ' OLAP (.*?)*') ORDER BY id ) WHERE explain LIKE '%Granules: %' @@ -152,7 +155,7 @@ SELECT * FROM ( EXPLAIN PLAN indexes = 1 - SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id + SELECT * FROM tokenbf_tab WHERE match(str, ' OLAP (.*?)*') ORDER BY id ) WHERE explain LIKE '%Granules: %' diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference index 4a6bc8498e1..298cc908178 100644 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference @@ -1,20 +1,20 @@ -100 10 10 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 16 +100 10 10 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 0 10 98 set max_size from 100 to 10 -10 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 16 +10 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 1 8 set max_size from 10 to 100 -100 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 16 +100 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 10 98 set max_elements from 10 to 2 -100 2 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 16 +100 2 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 2 18 set max_elements from 2 to 10 -100 10 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 5 5000 0 16 +100 10 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 10 98 diff --git a/tests/queries/0_stateless/02944_variant_as_common_type.sql b/tests/queries/0_stateless/02944_variant_as_common_type.sql index e985cf365dd..49ea5f2769c 100644 --- a/tests/queries/0_stateless/02944_variant_as_common_type.sql +++ b/tests/queries/0_stateless/02944_variant_as_common_type.sql @@ -1,5 +1,3 @@ -set allow_experimental_analyzer=0; -- The result type for if function with constant is different with analyzer. It wil be fixed after refactoring around constants in analyzer. - set allow_experimental_variant_type=1; set use_variant_as_common_type=1; diff --git a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh index 45e65b18e07..f7111d0afe2 100755 --- a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh +++ b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-ordinary-database, use-rocksdb +# Tags: no-ordinary-database, use-rocksdb, no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Normal importing, as we only insert 1000 rows, so it should be in memtable ${CLICKHOUSE_CLIENT} --query "CREATE TABLE IF NOT EXISTS rocksdb_worm (key UInt64, value UInt64) ENGINE = EmbeddedRocksDB() PRIMARY KEY key SETTINGS optimize_for_bulk_insert = 0;" -${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers(1000);" +${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers(1000) SETTINGS optimize_trivial_insert_select = 1;" ${CLICKHOUSE_CLIENT} --query "SELECT sum(value) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be 0 because all data is still in memtable ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" @@ -16,33 +16,32 @@ ${CLICKHOUSE_CLIENT} --query "ALTER TABLE rocksdb_worm MODIFY SETTING optimize_f # Testing that key serialization is identical w. and w/o bulk sink ${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;" -${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+2 FROM numbers(1000);" # should override previous keys +${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+2 FROM numbers(1000) SETTINGS optimize_trivial_insert_select = 1;" # should override previous keys ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm WHERE value = key + 2;" # With bulk insertion, there is no memtable, so a small insert should create a new file ${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;" -${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers(1000);" +${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers(1000) SETTINGS optimize_trivial_insert_select = 1;" ${CLICKHOUSE_CLIENT} --query "SELECT sum(value) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be 1 ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" # Testing insert with multiple sinks and fixed block size ${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;" # Must set both max_threads and max_insert_threads to 2 to make sure there is only two sinks -${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000) SETTINGS max_threads = 2, max_insert_threads = 2, max_block_size = 10000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, insert_deduplication_token = '';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000) SETTINGS max_threads = 2, max_insert_threads = 2, max_block_size = 10000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, insert_deduplication_token = '', optimize_trivial_insert_select = 1;" ${CLICKHOUSE_CLIENT} --query "SELECT sum(value) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be 2 because default bulk sink size is ~1M rows / SST file ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" # Testing insert with duplicated keys ${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;" -${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number % 1000, number+1 FROM numbers_mt(1000000) SETTINGS max_block_size = 100000, max_insert_threads = 1;" +${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number % 1000, number+1 FROM numbers_mt(1000000) SETTINGS max_block_size = 100000, max_insert_threads = 1, optimize_trivial_insert_select = 1;" ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM rocksdb_worm WHERE key = 0;" # should be the latest value - 999001 # Testing insert with multiple threads ${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;" -${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000)" & -${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000)" & +${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000) SETTINGS optimize_trivial_insert_select = 1" & +${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000) SETTINGS optimize_trivial_insert_select = 1" & wait ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" - diff --git a/tests/queries/0_stateless/02956_rocksdb_with_ttl.reference b/tests/queries/0_stateless/02956_rocksdb_with_ttl.reference new file mode 100644 index 00000000000..508f73c8d78 --- /dev/null +++ b/tests/queries/0_stateless/02956_rocksdb_with_ttl.reference @@ -0,0 +1,2 @@ +0 foo +0 diff --git a/tests/queries/0_stateless/02956_rocksdb_with_ttl.sql b/tests/queries/0_stateless/02956_rocksdb_with_ttl.sql new file mode 100644 index 00000000000..01efe19cf45 --- /dev/null +++ b/tests/queries/0_stateless/02956_rocksdb_with_ttl.sql @@ -0,0 +1,13 @@ +-- Tags: no-ordinary-database, use-rocksdb + +-- TTL = 2s +CREATE TABLE dict_with_ttl (key UInt64, value String) ENGINE = EmbeddedRocksDB(2) PRIMARY KEY (key); +INSERT INTO dict_with_ttl VALUES (0, 'foo'); +-- Data inserted correctly +SELECT * FROM dict_with_ttl; +-- If possible, we should test that even we execute OPTIMIZE TABLE, the data is still there if TTL is not expired yet +-- Nevertheless, query time is unpredictable with different builds, so we can't test it. So we only test that after 3s +-- we execute OPTIMIZE and the data should be gone. +SELECT sleep(3); +OPTIMIZE TABLE dict_with_ttl; +SELECT * FROM dict_with_ttl; diff --git a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql index 8f67cd7e030..7ebef866360 100644 --- a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql +++ b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql @@ -22,7 +22,7 @@ create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test2', type = object_storage, object_storage_type = s3, - metadata_storage_type = local, + metadata_type = local, endpoint = 'http://localhost:11111/test/common/', access_key_id = clickhouse, secret_access_key = clickhouse); @@ -32,7 +32,7 @@ create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test3', type = object_storage, object_storage_type = s3, - metadata_storage_type = local, + metadata_type = local, metadata_keep_free_space_bytes = 1024, endpoint = 'http://localhost:11111/test/common/', access_key_id = clickhouse, @@ -43,7 +43,7 @@ create table test (a Int32) engine = MergeTree() order by tuple() settings disk=disk(name='test4', type = object_storage, object_storage_type = s3, - metadata_storage_type = local, + metadata_type = local, metadata_keep_free_space_bytes = 0, endpoint = 'http://localhost:11111/test/common/', access_key_id = clickhouse, diff --git a/tests/queries/0_stateless/02969_auto_format_detection.reference b/tests/queries/0_stateless/02969_auto_format_detection.reference index 865db11defc..4b86be04996 100644 --- a/tests/queries/0_stateless/02969_auto_format_detection.reference +++ b/tests/queries/0_stateless/02969_auto_format_detection.reference @@ -82,7 +82,8 @@ CSV c1 Nullable(UInt64) c2 Nullable(String) c3 Array(Nullable(UInt64)) -c4 Tuple(Nullable(UInt64), Nullable(String)) +c4 Nullable(UInt64) +c5 Nullable(String) a Nullable(String) b Nullable(String) c Array(Nullable(String)) diff --git a/tests/queries/0_stateless/02977_csv_format_support_tuple.sql b/tests/queries/0_stateless/02977_csv_format_support_tuple.sql index d00cc00e097..f30e217ca0f 100644 --- a/tests/queries/0_stateless/02977_csv_format_support_tuple.sql +++ b/tests/queries/0_stateless/02977_csv_format_support_tuple.sql @@ -1,5 +1,9 @@ -- Tags: no-parallel +SET output_format_csv_serialize_tuple_into_separate_columns = false; +SET input_format_csv_deserialize_separate_columns_into_tuple = false; +SET input_format_csv_try_infer_strings_from_quoted_tuples = false; + insert into function file('02977_1.csv') select '20240305', 1, ['s', 'd'], map('a', 2), tuple('222', 33, map('abc', 5)) SETTINGS engine_file_truncate_on_insert=1; desc file('02977_1.csv'); select * from file('02977_1.csv') settings max_threads=1; diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference index d00491fd7e5..72749c905a3 100644 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference @@ -1 +1 @@ -1 +1 1 1 diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh index 1c72cf2b8c1..263a4535c0e 100755 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh @@ -11,4 +11,4 @@ $CLICKHOUSE_CLIENT --query_id $query_id --log_query_threads 1 --query="select nu $CLICKHOUSE_CLIENT -q "system flush logs;" -$CLICKHOUSE_CLIENT -q "select count() > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase() and thread_name = 'AggregDestruct';" +$CLICKHOUSE_CLIENT -q "select count() > 0, (countIf(thread_name = 'AggregDestruct') as aggs) > 0, aggs > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase();" diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.sql b/tests/queries/0_stateless/02993_lazy_index_loading.sql index 7de4af9ef0e..ffb4b7547bf 100644 --- a/tests/queries/0_stateless/02993_lazy_index_loading.sql +++ b/tests/queries/0_stateless/02993_lazy_index_loading.sql @@ -1,6 +1,7 @@ DROP TABLE IF EXISTS test; CREATE TABLE test (s String) ENGINE = MergeTree ORDER BY s SETTINGS index_granularity = 1; +SET optimize_trivial_insert_select = 1; INSERT INTO test SELECT randomString(1000) FROM numbers(100000); SELECT round(primary_key_bytes_in_memory, -7), round(primary_key_bytes_in_memory_allocated, -7) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; diff --git a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv index 4c0c9125b46..a391473e7c9 100644 --- a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv +++ b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv @@ -41,7 +41,7 @@ allow_experimental_query_deduplication 0 allow_experimental_refreshable_materialized_view 0 allow_experimental_s3queue 1 allow_experimental_shared_merge_tree 0 -allow_experimental_statistic 0 +allow_experimental_statistics 0 allow_experimental_undrop_table_query 1 allow_experimental_usearch_index 0 allow_experimental_window_functions 1 @@ -58,7 +58,7 @@ allow_prefetched_read_pool_for_remote_filesystem 1 allow_push_predicate_when_subquery_contains_with 1 allow_settings_after_format_in_insert 0 allow_simdjson 1 -allow_statistic_optimize 0 +allow_statistics_optimize 0 allow_suspicious_codecs 0 allow_suspicious_fixed_string_types 0 allow_suspicious_indices 0 diff --git a/tests/queries/0_stateless/02998_pretty_format_print_readable_number_on_single_value.sql b/tests/queries/0_stateless/02998_pretty_format_print_readable_number_on_single_value.sql index 5dc69488cea..46d6bb657c9 100644 --- a/tests/queries/0_stateless/02998_pretty_format_print_readable_number_on_single_value.sql +++ b/tests/queries/0_stateless/02998_pretty_format_print_readable_number_on_single_value.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_display_footer_column_names=0; SELECT 1_000_000 as a FORMAT Pretty; SELECT 1_000_000 as a FORMAT PrettyNoEscapes; SELECT 1_000_000 as a FORMAT PrettyMonoBlock; diff --git a/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql index d57db9151b9..c1e6eba6b6f 100644 --- a/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql +++ b/tests/queries/0_stateless/03000_virtual_columns_in_prewhere.sql @@ -1,3 +1,5 @@ +SET optimize_trivial_insert_select = 1; + drop table if exists x; create table x (i int, j int, k int) engine MergeTree order by tuple() settings index_granularity=8192, index_granularity_bytes = '10Mi', min_bytes_for_wide_part=0, min_rows_for_wide_part=0, ratio_of_defaults_for_sparse_serialization=1; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql index 34ba034f798..d8b5ebb3148 100644 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql +++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql @@ -4,9 +4,9 @@ drop table if exists rmt_master; drop table if exists rmt_slave; -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by tuple() settings always_fetch_merged_part=0; +create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by tuple() settings always_fetch_merged_part=0, old_parts_lifetime=600; -- prefer_fetch_merged_part_*_threshold=0, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by tuple() settings prefer_fetch_merged_part_time_threshold=0, prefer_fetch_merged_part_size_threshold=0; +create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by tuple() settings prefer_fetch_merged_part_time_threshold=0, prefer_fetch_merged_part_size_threshold=0, old_parts_lifetime=600; insert into rmt_master values (1); diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_2.reference b/tests/queries/0_stateless/03006_join_on_inequal_expression_2.reference new file mode 100644 index 00000000000..bab1fbd050f --- /dev/null +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_2.reference @@ -0,0 +1,290 @@ +-- { echoOn } +-- inequality operation +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +-- +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND t1.a > t2.key AND t1.key + t2.a > 1 ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND t1.a > t2.key AND t1.key + t2.a > 1 ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND t1.a > t2.key AND t1.key + t2.a > 1 ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND t1.a > t2.key AND t1.key + t2.a > 1 ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND t1.a > t2.key AND t1.key + t2.a > 1 ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND t1.a > t2.key AND t1.key + t2.a > 1 ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND t1.a > t2.key AND t1.key + t2.a > 1 ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND t1.a > t2.key AND t1.key + t2.a > 1 ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +-- +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.key < t2.a OR t1.a % 2 = 0) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.key < t2.a OR t1.a % 2 = 0) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.key < t2.a OR t1.a % 2 = 0) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.key < t2.a OR t1.a % 2 = 0) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.key < t2.a OR t1.a % 2 = 0) ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.key < t2.a OR t1.a % 2 = 0) ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.key < t2.a OR t1.a % 2 = 0) ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.key < t2.a OR t1.a % 2 = 0) ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +-- BETWEEN +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t2.a BETWEEN 8 AND t1.a) ORDER BY ALL; +1 10 alpha 0 0 +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t2.a BETWEEN 8 AND t1.a) ORDER BY ALL; +1 10 alpha 0 0 +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t2.a BETWEEN 8 AND t1.a) ORDER BY ALL; +2 15 beta 2 10 beta +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t2.a BETWEEN 8 AND t1.a) ORDER BY ALL; +2 15 beta 2 10 beta +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t2.a BETWEEN 8 AND t1.a) ORDER BY ALL; +0 0 1 5 ALPHA +0 0 4 25 delta +2 15 beta 2 10 beta +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t2.a BETWEEN 8 AND t1.a) ORDER BY ALL; +0 0 1 5 ALPHA +0 0 4 25 delta +2 15 beta 2 10 beta +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t2.a BETWEEN 8 AND t1.a) ORDER BY ALL; +0 0 1 5 ALPHA +0 0 4 25 delta +1 10 alpha 0 0 +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t2.a BETWEEN 8 AND t1.a) ORDER BY ALL; +0 0 1 5 ALPHA +0 0 4 25 delta +1 10 alpha 0 0 +2 15 beta 2 10 beta +3 20 gamma 0 0 +-- +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.a IN (SELECT a FROM t2 WHERE a = 10)) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.a IN (SELECT a FROM t2 WHERE a = 10)) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.a IN (SELECT a FROM t2 WHERE a = 10)) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.a IN (SELECT a FROM t2 WHERE a = 10)) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.a IN (SELECT a FROM t2 WHERE a = 10)) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.a IN (SELECT a FROM t2 WHERE a = 10)) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.a IN (SELECT a FROM t2 WHERE a = 10)) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.a IN (SELECT a FROM t2 WHERE a = 10)) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +-- Stupid condition +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 2 10 beta +3 20 gamma 0 0 +-- Window functions with stupid condition +SET join_algorithm='hash'; +SELECT t1.*, t2.*, AVG(t1.a) OVER () AS avg_b, SUM(t2.key) OVER () AS sum_c FROM t1 LEFT JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +1 10 alpha 1 5 ALPHA 15 3 +2 15 beta 2 10 beta 15 3 +3 20 gamma 0 0 15 3 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.*, AVG(t1.a) OVER () AS avg_b, SUM(t2.key) OVER () AS sum_c FROM t1 LEFT JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +1 10 alpha 1 5 ALPHA 15 3 +2 15 beta 2 10 beta 15 3 +3 20 gamma 0 0 15 3 +SET join_algorithm='hash'; +SELECT t1.*, t2.*, AVG(t1.a) OVER () AS avg_b, SUM(t2.key) OVER () AS sum_c FROM t1 INNER JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +1 10 alpha 1 5 ALPHA 12.5 3 +2 15 beta 2 10 beta 12.5 3 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.*, AVG(t1.a) OVER () AS avg_b, SUM(t2.key) OVER () AS sum_c FROM t1 INNER JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +1 10 alpha 1 5 ALPHA 12.5 3 +2 15 beta 2 10 beta 12.5 3 +SET join_algorithm='hash'; +SELECT t1.*, t2.*, AVG(t1.a) OVER () AS avg_b, SUM(t2.key) OVER () AS sum_c FROM t1 RIGHT JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +0 0 4 25 delta 8.333333333333334 7 +1 10 alpha 1 5 ALPHA 8.333333333333334 7 +2 15 beta 2 10 beta 8.333333333333334 7 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.*, AVG(t1.a) OVER () AS avg_b, SUM(t2.key) OVER () AS sum_c FROM t1 RIGHT JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +0 0 4 25 delta 8.333333333333334 7 +1 10 alpha 1 5 ALPHA 8.333333333333334 7 +2 15 beta 2 10 beta 8.333333333333334 7 +SET join_algorithm='hash'; +SELECT t1.*, t2.*, AVG(t1.a) OVER () AS avg_b, SUM(t2.key) OVER () AS sum_c FROM t1 FULL JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +0 0 4 25 delta 11.25 7 +1 10 alpha 1 5 ALPHA 11.25 7 +2 15 beta 2 10 beta 11.25 7 +3 20 gamma 0 0 11.25 7 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.*, AVG(t1.a) OVER () AS avg_b, SUM(t2.key) OVER () AS sum_c FROM t1 FULL JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +0 0 4 25 delta 11.25 7 +1 10 alpha 1 5 ALPHA 11.25 7 +2 15 beta 2 10 beta 11.25 7 +3 20 gamma 0 0 11.25 7 +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_2.sql.j2 b/tests/queries/0_stateless/03006_join_on_inequal_expression_2.sql.j2 new file mode 100644 index 00000000000..f15fced161c --- /dev/null +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_2.sql.j2 @@ -0,0 +1,82 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + + +CREATE TABLE t1 ( + key UInt32, + a UInt32, + attr String +) ENGINE = MergeTree ORDER BY key; + +CREATE TABLE t2 ( + key UInt32, + a UInt32, + attr String +) ENGINE = MergeTree ORDER BY key; + +INSERT INTO t1 (key, a, attr) VALUES (1, 10, 'alpha'), (2, 15, 'beta'), (3, 20, 'gamma'); +INSERT INTO t2 (key, a, attr) VALUES (1, 5, 'ALPHA'), (2, 10, 'beta'), (4, 25, 'delta'); + + +SET allow_experimental_analyzer=1; +SET allow_experimental_join_condition=1; +SET join_use_nulls=0; +-- { echoOn } +-- inequality operation +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +{% endfor -%} +{% endfor -%} + +-- +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND t1.a > t2.key AND t1.key + t2.a > 1 ORDER BY ALL; +{% endfor -%} +{% endfor -%} + +-- +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.key < t2.a OR t1.a % 2 = 0) ORDER BY ALL; +{% endfor -%} +{% endfor -%} + +-- BETWEEN +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t2.a BETWEEN 8 AND t1.a) ORDER BY ALL; +{% endfor -%} +{% endfor -%} + +-- +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.a IN (SELECT a FROM t2 WHERE a = 10)) ORDER BY ALL; +{% endfor -%} +{% endfor -%} + +-- Stupid condition +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +{% endfor -%} +{% endfor -%} + +-- Window functions with stupid condition +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.*, AVG(t1.a) OVER () AS avg_b, SUM(t2.key) OVER () AS sum_c FROM t1 {{ join_type }} JOIN t2 ON t1.key == t2.key AND (t1.a * length(t2.attr) / length(t1.attr) <> t2.a + t1.key - t2.key) ORDER BY ALL; +{% endfor -%} +{% endfor -%} + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_3.reference b/tests/queries/0_stateless/03006_join_on_inequal_expression_3.reference new file mode 100644 index 00000000000..1ec6f911897 --- /dev/null +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_3.reference @@ -0,0 +1,90 @@ +-- { echoOn } + +-- Support for query lower +SET join_algorithm='hash'; +SELECT * FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT * FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +SET join_algorithm='hash'; +SELECT * FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +SET join_algorithm='grace_hash'; +SELECT * FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +1 10 alpha 1 5 ALPHA +SET join_algorithm='hash'; +SELECT * FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +SET join_algorithm='grace_hash'; +SELECT * FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +SET join_algorithm='hash'; +SELECT * FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +SET join_algorithm='grace_hash'; +SELECT * FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +0 0 2 10 beta +0 0 4 25 delta +1 10 alpha 1 5 ALPHA +2 15 beta 0 0 +3 20 gamma 0 0 +-- Subquery JOIN +SET join_algorithm='hash'; +SELECT * FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) LEFT JOIN (SELECT * FROM VALUES('key UInt64, a UInt64', (0, 10), (1, 100), (2, 1000))) t3 ON t1.key=t3.key AND t2.key=t3.key AND t3.a!=t1.a AND t3.a!=t2.a ORDER BY ALL; +1 10 alpha 1 5 ALPHA 1 100 +2 15 beta 0 0 0 0 +3 20 gamma 0 0 0 0 +SET join_algorithm='grace_hash'; +SELECT * FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) LEFT JOIN (SELECT * FROM VALUES('key UInt64, a UInt64', (0, 10), (1, 100), (2, 1000))) t3 ON t1.key=t3.key AND t2.key=t3.key AND t3.a!=t1.a AND t3.a!=t2.a ORDER BY ALL; +1 10 alpha 1 5 ALPHA 1 100 +2 15 beta 0 0 0 0 +3 20 gamma 0 0 0 0 +SET join_algorithm='hash'; +SELECT * FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) INNER JOIN (SELECT * FROM VALUES('key UInt64, a UInt64', (0, 10), (1, 100), (2, 1000))) t3 ON t1.key=t3.key AND t2.key=t3.key AND t3.a!=t1.a AND t3.a!=t2.a ORDER BY ALL; +1 10 alpha 1 5 ALPHA 1 100 +SET join_algorithm='grace_hash'; +SELECT * FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) INNER JOIN (SELECT * FROM VALUES('key UInt64, a UInt64', (0, 10), (1, 100), (2, 1000))) t3 ON t1.key=t3.key AND t2.key=t3.key AND t3.a!=t1.a AND t3.a!=t2.a ORDER BY ALL; +1 10 alpha 1 5 ALPHA 1 100 +SET join_algorithm='hash'; +SELECT * FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) RIGHT JOIN (SELECT * FROM VALUES('key UInt64, a UInt64', (0, 10), (1, 100), (2, 1000))) t3 ON t1.key=t3.key AND t2.key=t3.key AND t3.a!=t1.a AND t3.a!=t2.a ORDER BY ALL; +0 0 0 0 0 10 +0 0 0 0 2 1000 +1 10 alpha 1 5 ALPHA 1 100 +SET join_algorithm='grace_hash'; +SELECT * FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) RIGHT JOIN (SELECT * FROM VALUES('key UInt64, a UInt64', (0, 10), (1, 100), (2, 1000))) t3 ON t1.key=t3.key AND t2.key=t3.key AND t3.a!=t1.a AND t3.a!=t2.a ORDER BY ALL; +0 0 0 0 0 10 +0 0 0 0 2 1000 +1 10 alpha 1 5 ALPHA 1 100 +SET join_algorithm='hash'; +SELECT * FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) FULL JOIN (SELECT * FROM VALUES('key UInt64, a UInt64', (0, 10), (1, 100), (2, 1000))) t3 ON t1.key=t3.key AND t2.key=t3.key AND t3.a!=t1.a AND t3.a!=t2.a ORDER BY ALL; +0 0 0 0 0 10 +0 0 0 0 2 1000 +0 0 2 10 beta 0 0 +0 0 4 25 delta 0 0 +1 10 alpha 1 5 ALPHA 1 100 +2 15 beta 0 0 0 0 +3 20 gamma 0 0 0 0 +SET join_algorithm='grace_hash'; +SELECT * FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) FULL JOIN (SELECT * FROM VALUES('key UInt64, a UInt64', (0, 10), (1, 100), (2, 1000))) t3 ON t1.key=t3.key AND t2.key=t3.key AND t3.a!=t1.a AND t3.a!=t2.a ORDER BY ALL; +0 0 0 0 0 10 +0 0 0 0 2 1000 +0 0 2 10 beta 0 0 +0 0 4 25 delta 0 0 +1 10 alpha 1 5 ALPHA 1 100 +2 15 beta 0 0 0 0 +3 20 gamma 0 0 0 0 +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_3.sql.j2 b/tests/queries/0_stateless/03006_join_on_inequal_expression_3.sql.j2 new file mode 100644 index 00000000000..a97153ce3aa --- /dev/null +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_3.sql.j2 @@ -0,0 +1,44 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + + +CREATE TABLE t1 ( + key UInt32, + a UInt32, + attr String +) ENGINE = MergeTree ORDER BY key; + +CREATE TABLE t2 ( + key UInt32, + a UInt32, + attr String +) ENGINE = MergeTree ORDER BY key; + +INSERT INTO t1 (key, a, attr) VALUES (1, 10, 'alpha'), (2, 15, 'beta'), (3, 20, 'gamma'); +INSERT INTO t2 (key, a, attr) VALUES (1, 5, 'ALPHA'), (2, 10, 'beta'), (4, 25, 'delta'); + + +SET allow_experimental_analyzer=1; +SET allow_experimental_join_condition=1; +SET join_use_nulls=0; +-- { echoOn } + +-- Support for query lower +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT * FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) ORDER BY ALL; +{% endfor -%} +{% endfor -%} + + +-- Subquery JOIN +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT * FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.attr != t2.attr) {{ join_type }} JOIN (SELECT * FROM VALUES('key UInt64, a UInt64', (0, 10), (1, 100), (2, 1000))) t3 ON t1.key=t3.key AND t2.key=t3.key AND t3.a!=t1.a AND t3.a!=t2.a ORDER BY ALL; +{% endfor -%} +{% endfor -%} + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_4.reference b/tests/queries/0_stateless/03006_join_on_inequal_expression_4.reference new file mode 100644 index 00000000000..0c747d04c58 --- /dev/null +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_4.reference @@ -0,0 +1,163 @@ +-- { echoOn } + +-- These queries work +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND ((t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND ((t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND ((t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +2 15 2 10 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND ((t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +2 15 2 10 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND ((t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +2 15 2 10 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND ((t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +2 15 2 10 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND ((t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND ((t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.a=2 AND (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +1 10 0 0 +2 15 0 0 +3 20 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.a=2 AND (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +1 10 0 0 +2 15 0 0 +3 20 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.a=2 AND (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.a=2 AND (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.a=2 AND (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 2 10 +0 0 4 25 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.a=2 AND (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 2 10 +0 0 4 25 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.a=2 AND (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 2 10 +0 0 4 25 +1 10 0 0 +2 15 0 0 +3 20 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.a=2 AND (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 2 10 +0 0 4 25 +1 10 0 0 +2 15 0 0 +3 20 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a = (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a = (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a = (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +2 15 2 10 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a = (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +2 15 2 10 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a = (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +2 15 2 10 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a = (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +2 15 2 10 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a = (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a = (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +2 15 2 10 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +2 15 2 10 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +2 15 2 10 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +2 15 2 10 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +1 10 0 0 +2 15 2 10 +3 20 0 0 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +0 0 1 5 +0 0 4 25 +1 10 0 0 +2 15 2 10 +3 20 0 0 diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_4.sql.j2 b/tests/queries/0_stateless/03006_join_on_inequal_expression_4.sql.j2 new file mode 100644 index 00000000000..3235019821b --- /dev/null +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_4.sql.j2 @@ -0,0 +1,51 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + + +CREATE TABLE t1 ( + key UInt32, + a UInt32 +) ENGINE = MergeTree ORDER BY key; + +CREATE TABLE t2 ( + key UInt32, + a UInt32 +) ENGINE = MergeTree ORDER BY key; + +INSERT INTO t1 (key, a) VALUES (1, 10), (2, 15), (3, 20); +INSERT INTO t2 (key, a) VALUES (1, 5), (2, 10), (4, 25); + +SET allow_experimental_analyzer=1; +SET allow_experimental_join_condition=1; +SET join_algorithm='hash'; +-- { echoOn } + +-- These queries work +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND ((t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +{% endfor -%} +{% endfor -%} + +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.a=2 AND (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +{% endfor -%} +{% endfor -%} + +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a = (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +{% endfor -%} +{% endfor -%} + + +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 10))) ORDER BY ALL; +{% endfor -%} +{% endfor -%} diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference index 806596f8a63..46f24f73356 100644 --- a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference @@ -38,6 +38,17 @@ key4 f 2 3 4 0 0 \N SELECT t1.*, t2.* from t1 LEFT JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); 1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 3))) ORDER BY ALL; +key1 a 1 1 2 key1 C 3 4 5 +key1 b 2 3 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 C 3 4 5 +key1 e 5 5 5 key1 C 3 4 5 +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); key1 a 1 1 2 key1 A 1 2 1 key1 a 1 1 2 key1 B 2 1 2 @@ -67,6 +78,16 @@ key1 c 3 2 1 key1 D 4 1 6 SELECT t1.*, t2.* from t1 INNER JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 INNER JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); 1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 3))) ORDER BY ALL; +key1 a 1 1 2 key1 C 3 4 5 +key1 b 2 3 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 C 3 4 5 +key1 e 5 5 5 key1 C 3 4 5 +key4 f 2 3 4 key4 F 1 1 1 SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); 0 0 \N key3 a3 1 1 1 key1 a 1 1 2 key1 A 1 2 1 @@ -102,6 +123,17 @@ key1 c 3 2 1 key1 D 4 1 6 SELECT t1.*, t2.* from t1 RIGHT JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); 1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 3))) ORDER BY ALL; + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 C 3 4 5 +key1 b 2 3 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 C 3 4 5 +key1 e 5 5 5 key1 C 3 4 5 +key4 f 2 3 4 key4 F 1 1 1 SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); 0 0 \N key3 a3 1 1 1 key1 a 1 1 2 key1 A 1 2 1 @@ -146,6 +178,18 @@ key4 f 2 3 4 0 0 \N SELECT t1.*, t2.* from t1 FULL JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 FULL JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); 1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 3))) ORDER BY ALL; + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 C 3 4 5 +key1 b 2 3 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 C 3 4 5 +key1 e 5 5 5 key1 C 3 4 5 +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 SET join_algorithm='grace_hash'; SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); key1 a 1 1 2 key1 A 1 2 1 @@ -185,6 +229,17 @@ key4 f 2 3 4 0 0 \N SELECT t1.*, t2.* from t1 LEFT JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); 1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 3))) ORDER BY ALL; +key1 a 1 1 2 key1 C 3 4 5 +key1 b 2 3 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 C 3 4 5 +key1 e 5 5 5 key1 C 3 4 5 +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); key1 a 1 1 2 key1 A 1 2 1 key1 a 1 1 2 key1 B 2 1 2 @@ -214,6 +269,16 @@ key1 c 3 2 1 key1 D 4 1 6 SELECT t1.*, t2.* from t1 INNER JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 INNER JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); 1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 3))) ORDER BY ALL; +key1 a 1 1 2 key1 C 3 4 5 +key1 b 2 3 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 C 3 4 5 +key1 e 5 5 5 key1 C 3 4 5 +key4 f 2 3 4 key4 F 1 1 1 SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); 0 0 \N key3 a3 1 1 1 key1 a 1 1 2 key1 A 1 2 1 @@ -249,6 +314,17 @@ key1 c 3 2 1 key1 D 4 1 6 SELECT t1.*, t2.* from t1 RIGHT JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); 1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 3))) ORDER BY ALL; + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 C 3 4 5 +key1 b 2 3 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 C 3 4 5 +key1 e 5 5 5 key1 C 3 4 5 +key4 f 2 3 4 key4 F 1 1 1 SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); 0 0 \N key3 a3 1 1 1 key1 a 1 1 2 key1 A 1 2 1 @@ -293,6 +369,18 @@ key4 f 2 3 4 0 0 \N SELECT t1.*, t2.* from t1 FULL JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 FULL JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); 1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 3))) ORDER BY ALL; + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 C 3 4 5 +key1 b 2 3 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 C 3 4 5 +key1 e 5 5 5 key1 C 3 4 5 +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 SET join_algorithm='hash'; SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); key1 a 1 1 2 key1 A 1 2 1 diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 index d3aa74f5c38..a363101ca69 100644 --- a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 @@ -18,6 +18,7 @@ SELECT t1.*, t2.* from t1 {{ join_type }} JOIN t2 ON t1.key = t2.key and (t1.b + SELECT t1.*, t2.* from t1 {{ join_type }} JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); SELECT t1.*, t2.* from t1 {{ join_type }} JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 {{ join_type }} JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.a=2 OR (t2.a IN (SELECT a FROM t1 WHERE a = 3))) ORDER BY ALL; {% endfor -%} {% endfor -%} diff --git a/tests/queries/0_stateless/03013_json_key_ignore_case.reference b/tests/queries/0_stateless/03013_json_key_ignore_case.reference new file mode 100644 index 00000000000..54683d8fbc5 --- /dev/null +++ b/tests/queries/0_stateless/03013_json_key_ignore_case.reference @@ -0,0 +1,3 @@ +1 77328912 Ben +2 77328913 Jim +3 77328914 Bill diff --git a/tests/queries/0_stateless/03013_json_key_ignore_case.sh b/tests/queries/0_stateless/03013_json_key_ignore_case.sh new file mode 100755 index 00000000000..807e743b22a --- /dev/null +++ b/tests/queries/0_stateless/03013_json_key_ignore_case.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') + +cp "$CURDIR"/data_json/key_ignore_case.json $USER_FILES_PATH/ + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (id UInt16, reqid UInt32, name String) engine=MergeTree order by id" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_tbl SELECT * FROM file('key_ignore_case.json', 'JSONEachRow') SETTINGS input_format_json_ignore_key_case=true" +$CLICKHOUSE_CLIENT -q "select * from test_tbl" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.reference b/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.reference new file mode 100644 index 00000000000..f83d884fd78 --- /dev/null +++ b/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.reference @@ -0,0 +1 @@ +Syntax error diff --git a/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.sh b/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.sh new file mode 100755 index 00000000000..762201ed5fc --- /dev/null +++ b/tests/queries/0_stateless/03015_parser_shortcut_lexer_errors.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --query "SELECT((((((((((SELECT(((((((((SELECT((((((((((SELECT(((((((((SELECT((((((((((SELECT(((((((((SELECT 1+)))))))))))))))))))))))))))))))))))))))))))))))))))))))))'" 2>&1 | grep -o -F 'Syntax error' diff --git a/tests/queries/0_stateless/03022_highlight_digit_groups.sql b/tests/queries/0_stateless/03022_highlight_digit_groups.sql index c48a02e712f..8c371c409c4 100644 --- a/tests/queries/0_stateless/03022_highlight_digit_groups.sql +++ b/tests/queries/0_stateless/03022_highlight_digit_groups.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_display_footer_column_names=0; SET output_format_pretty_row_numbers = 0; SELECT exp10(number) * (number % 2 ? 1 : -1) FROM numbers(30) FORMAT PrettySpace SETTINGS output_format_pretty_color = 1; diff --git a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.expect b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.expect deleted file mode 100755 index de15a199132..00000000000 --- a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.expect +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/expect -f - -set basedir [file dirname $argv0] -set basename [file tail $argv0] -if {[info exists env(CLICKHOUSE_TMP)]} { - set CLICKHOUSE_TMP $env(CLICKHOUSE_TMP) -} else { - set CLICKHOUSE_TMP "." -} -exp_internal -f $CLICKHOUSE_TMP/$basename.debuglog 0 - -log_user 0 -set timeout 60 -match_max 100000 -set stty_init "rows 25 cols 120" - -expect_after { - -i $any_spawn_id eof { exp_continue } - -i $any_spawn_id timeout { exit 1 } -} - -spawn clickhouse-local -expect ":) " - -# Trivial SELECT with LIMIT from system.zeros shows progress bar. -send "SELECT * FROM system.zeros LIMIT 10000000 FORMAT Null SETTINGS max_execution_speed = 1000000, timeout_before_checking_execution_speed = 0, max_block_size = 128\r" -expect "Progress: " -expect "█" -send "\3" -expect "Query was cancelled." -expect ":) " - -send "SELECT * FROM system.zeros_mt LIMIT 10000000 FORMAT Null SETTINGS max_execution_speed = 1000000, timeout_before_checking_execution_speed = 0, max_block_size = 128\r" -expect "Progress: " -expect "█" -send "\3" -expect "Query was cancelled." -expect ":) " - -# As well as from generateRandom -send "SELECT * FROM generateRandom() LIMIT 10000000 FORMAT Null SETTINGS max_execution_speed = 1000000, timeout_before_checking_execution_speed = 0, max_block_size = 128\r" -expect "Progress: " -expect "█" -send "\3" -expect "Query was cancelled." -expect ":) " - -send "exit\r" -expect eof diff --git a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.reference b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.reference index e69de29bb2d..6ca5ae94f9a 100644 --- a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.reference +++ b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.reference @@ -0,0 +1,3 @@ +Matched +Matched +Matched diff --git a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh new file mode 100755 index 00000000000..500a12587a2 --- /dev/null +++ b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function run_with_progress_and_match_total_rows() +{ + CURL_RESPONSE=$(echo "$1" | \ + ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" --data-binary @- 2>&1) + + echo "$CURL_RESPONSE" | grep -q '"total_rows_to_read":"100"' && echo "Matched" || echo "Expected total_rows_to_read not found: ${CURL_RESPONSE}" +} + +run_with_progress_and_match_total_rows 'SELECT * FROM system.zeros LIMIT 100' +run_with_progress_and_match_total_rows 'SELECT * FROM system.zeros_mt LIMIT 100' +run_with_progress_and_match_total_rows "SELECT * FROM generateRandom('number UInt64') LIMIT 100" diff --git a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql index da2a387e07c..ac64135b593 100644 --- a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql +++ b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql @@ -12,6 +12,7 @@ system stop distributed sends dist_in; create table dist_out as data engine=Distributed(test_shard_localhost, currentDatabase(), data); set prefer_localhost_replica=0; +SET optimize_trivial_insert_select = 1; insert into dist_in select number/100, number from system.numbers limit 1e6 settings max_memory_usage='20Mi'; system flush distributed dist_in; -- { serverError MEMORY_LIMIT_EXCEEDED } diff --git a/tests/queries/0_stateless/03033_set_index_in.sql b/tests/queries/0_stateless/03033_set_index_in.sql index ad42a576444..bc0676fc5ef 100644 --- a/tests/queries/0_stateless/03033_set_index_in.sql +++ b/tests/queries/0_stateless/03033_set_index_in.sql @@ -1,3 +1,5 @@ +SET optimize_trivial_insert_select = 1; + create table a (k UInt64, v UInt64, index i (v) type set(100) granularity 2) engine MergeTree order by k settings index_granularity=8192, index_granularity_bytes=1000000000, min_index_granularity_bytes=0; insert into a select number, intDiv(number, 4096) from numbers(1000000); select sum(1+ignore(*)) from a where indexHint(v in (20, 40)); @@ -6,4 +8,4 @@ select sum(1+ignore(*)) from a where indexHint(v in (select 20 union all select SELECT 1 FROM a PREWHERE v IN (SELECT 1) WHERE v IN (SELECT 2); select 1 from a where indexHint(indexHint(materialize(0))); -select sum(1+ignore(*)) from a where indexHint(indexHint(v in (20, 40))); \ No newline at end of file +select sum(1+ignore(*)) from a where indexHint(indexHint(v in (20, 40))); diff --git a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference index 00740e6380f..80f4e309505 100644 --- a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference +++ b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference @@ -33,10 +33,10 @@ Positions: 4 2 0 1 Actions: INPUT : 0 -> id UInt64 : 0 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 - ALIAS id :: 0 -> __table1.id UInt64 : 3 - ALIAS value :: 1 -> __table1.value String : 0 - FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 - Positions: 1 3 0 + ALIAS id : 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 4 + FUNCTION equals(id :: 0, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 4 ReadFromMergeTree (default.test_table_1) Header: id UInt64 value String @@ -50,10 +50,10 @@ Positions: 4 2 0 1 Actions: INPUT : 0 -> id UInt64 : 0 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 - ALIAS id :: 0 -> __table2.id UInt64 : 3 - ALIAS value :: 1 -> __table2.value String : 0 - FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 - Positions: 1 3 0 + ALIAS id : 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 4 + FUNCTION equals(id :: 0, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 4 ReadFromMergeTree (default.test_table_2) Header: id UInt64 value String @@ -100,10 +100,10 @@ Positions: 4 2 0 1 Actions: INPUT : 0 -> id UInt64 : 0 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 - ALIAS id :: 0 -> __table1.id UInt64 : 3 - ALIAS value :: 1 -> __table1.value String : 0 - FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 - Positions: 1 3 0 + ALIAS id : 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 4 + FUNCTION equals(id :: 0, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 4 ReadFromMergeTree (default.test_table_1) Header: id UInt64 value String @@ -117,10 +117,10 @@ Positions: 4 2 0 1 Actions: INPUT : 0 -> id UInt64 : 0 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 - ALIAS id :: 0 -> __table2.id UInt64 : 3 - ALIAS value :: 1 -> __table2.value String : 0 - FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 - Positions: 1 3 0 + ALIAS id : 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 4 + FUNCTION equals(id :: 0, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 4 ReadFromMergeTree (default.test_table_2) Header: id UInt64 value String @@ -168,12 +168,12 @@ Positions: 4 2 0 1 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 3 - ALIAS id :: 0 -> __table1.id UInt64 : 4 - ALIAS value :: 1 -> __table1.value String : 0 - FUNCTION equals(__table1.id : 4, 6_UInt8 :: 2) -> equals(__table1.id, 6_UInt8) UInt8 : 1 - FUNCTION equals(__table1.id : 4, 5_UInt8 :: 3) -> equals(__table1.id, 5_UInt8) UInt8 : 2 + ALIAS id : 0 -> __table1.id UInt64 : 4 + ALIAS value :: 1 -> __table1.value String : 5 + FUNCTION equals(id : 0, 6_UInt8 :: 2) -> equals(__table1.id, 6_UInt8) UInt8 : 1 + FUNCTION equals(id :: 0, 5_UInt8 :: 3) -> equals(__table1.id, 5_UInt8) UInt8 : 2 FUNCTION and(equals(__table1.id, 5_UInt8) :: 2, equals(__table1.id, 6_UInt8) :: 1) -> and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) UInt8 : 3 - Positions: 3 4 0 + Positions: 3 4 5 ReadFromMergeTree (default.test_table_1) Header: id UInt64 value String @@ -188,12 +188,12 @@ Positions: 4 2 0 1 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 3 - ALIAS id :: 0 -> __table2.id UInt64 : 4 - ALIAS value :: 1 -> __table2.value String : 0 - FUNCTION equals(__table2.id : 4, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 - FUNCTION equals(__table2.id : 4, 6_UInt8 :: 3) -> equals(__table2.id, 6_UInt8) UInt8 : 2 + ALIAS id : 0 -> __table2.id UInt64 : 4 + ALIAS value :: 1 -> __table2.value String : 5 + FUNCTION equals(id : 0, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + FUNCTION equals(id :: 0, 6_UInt8 :: 3) -> equals(__table2.id, 6_UInt8) UInt8 : 2 FUNCTION and(equals(__table2.id, 6_UInt8) :: 2, equals(__table2.id, 5_UInt8) :: 1) -> and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) UInt8 : 3 - Positions: 3 4 0 + Positions: 3 4 5 ReadFromMergeTree (default.test_table_2) Header: id UInt64 value String @@ -237,10 +237,10 @@ Positions: 4 2 0 1 Actions: INPUT : 0 -> id UInt64 : 0 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 - ALIAS id :: 0 -> __table1.id UInt64 : 3 - ALIAS value :: 1 -> __table1.value String : 0 - FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 - Positions: 1 3 0 + ALIAS id : 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 4 + FUNCTION equals(id :: 0, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 4 ReadFromMergeTree (default.test_table_1) Header: id UInt64 value String @@ -254,10 +254,10 @@ Positions: 4 2 0 1 Actions: INPUT : 0 -> id UInt64 : 0 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 - ALIAS id :: 0 -> __table2.id UInt64 : 3 - ALIAS value :: 1 -> __table2.value String : 0 - FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 - Positions: 1 3 0 + ALIAS id : 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 4 + FUNCTION equals(id :: 0, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 4 ReadFromMergeTree (default.test_table_2) Header: id UInt64 value String @@ -452,10 +452,10 @@ Positions: 4 2 0 1 Actions: INPUT : 0 -> id UInt64 : 0 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 - ALIAS id :: 0 -> __table1.id UInt64 : 3 - ALIAS value :: 1 -> __table1.value String : 0 - FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 - Positions: 1 3 0 + ALIAS id : 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 4 + FUNCTION equals(id :: 0, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 4 ReadFromMergeTree (default.test_table_1) Header: id UInt64 value String @@ -469,10 +469,10 @@ Positions: 4 2 0 1 Actions: INPUT : 0 -> id UInt64 : 0 INPUT : 1 -> value String : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 - ALIAS id :: 0 -> __table2.id UInt64 : 3 - ALIAS value :: 1 -> __table2.value String : 0 - FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 - Positions: 1 3 0 + ALIAS id : 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 4 + FUNCTION equals(id :: 0, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 4 ReadFromMergeTree (default.test_table_2) Header: id UInt64 value String diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.reference b/tests/queries/0_stateless/03036_reading_s3_archives.reference index 36ced212a1b..eacf16d0295 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.reference +++ b/tests/queries/0_stateless/03036_reading_s3_archives.reference @@ -1,52 +1,52 @@ -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -1 Str1 example1.csv test/03036_archive1.zip::example1.csv -2 Str2 example1.csv test/03036_archive1.zip::example1.csv -3 Str3 example2.csv test/03036_archive1.zip::example2.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive1.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.zip::example3.csv -6 Str6 example3.csv test/03036_archive2.zip::example3.csv -1 Str1 example1.csv test/03036_archive1.tar::example1.csv -2 Str2 example1.csv test/03036_archive1.tar::example1.csv -7 Str7 example4.csv test/03036_archive1.tar::example4.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive1.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -11 Str11 example6.csv test/03036_archive3.tar.gz::example6.csv -12 Str12 example6.csv test/03036_archive3.tar.gz::example6.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -3 Str3 example2.csv test/03036_archive2.zip::example2.csv -4 Str4 example2.csv test/03036_archive2.zip::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -7 Str7 example4.csv test/03036_archive2.tar::example4.csv -8 Str8 example4.csv test/03036_archive2.tar::example4.csv -9 Str9 example5.csv test/03036_archive2.tar::example5.csv -10 Str10 example5.csv test/03036_archive2.tar::example5.csv -3 Str3 example2.csv test/03036_archive3.tar.gz::example2.csv -4 Str4 example2.csv test/03036_archive3.tar.gz::example2.csv -5 Str5 example3.csv test/03036_archive2.tar::example3.csv -6 Str6 example3.csv test/03036_archive2.tar::example3.csv -13 Str13 example7.csv test/03036_compressed_file_archive.zip::example7.csv -14 Str14 example7.csv test/03036_compressed_file_archive.zip::example7.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +1 Str1 25 example1.csv test/03036_archive1.zip::example1.csv +2 Str2 25 example1.csv test/03036_archive1.zip::example1.csv +3 Str3 25 example2.csv test/03036_archive1.zip::example2.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive1.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.zip::example3.csv +6 Str6 25 example3.csv test/03036_archive2.zip::example3.csv +1 Str1 25 example1.csv test/03036_archive1.tar::example1.csv +2 Str2 25 example1.csv test/03036_archive1.tar::example1.csv +7 Str7 25 example4.csv test/03036_archive1.tar::example4.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive1.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +11 Str11 29 example6.csv test/03036_archive3.tar.gz::example6.csv +12 Str12 29 example6.csv test/03036_archive3.tar.gz::example6.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +3 Str3 25 example2.csv test/03036_archive2.zip::example2.csv +4 Str4 25 example2.csv test/03036_archive2.zip::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +7 Str7 25 example4.csv test/03036_archive2.tar::example4.csv +8 Str8 25 example4.csv test/03036_archive2.tar::example4.csv +9 Str9 27 example5.csv test/03036_archive2.tar::example5.csv +10 Str10 27 example5.csv test/03036_archive2.tar::example5.csv +3 Str3 25 example2.csv test/03036_archive3.tar.gz::example2.csv +4 Str4 25 example2.csv test/03036_archive3.tar.gz::example2.csv +5 Str5 25 example3.csv test/03036_archive2.tar::example3.csv +6 Str6 25 example3.csv test/03036_archive2.tar::example3.csv +13 Str13 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv +14 Str14 57 example7.csv test/03036_compressed_file_archive.zip::example7.csv diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.sql b/tests/queries/0_stateless/03036_reading_s3_archives.sql index 00d7cc25e1a..43bda4ee704 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.sql +++ b/tests/queries/0_stateless/03036_reading_s3_archives.sql @@ -1,22 +1,22 @@ -- Tags: no-fasttest -- Tag no-fasttest: Depends on AWS -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } -select id, data, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.zip :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.zip :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example2.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.zip :: example*') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive1.tar :: example1.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar :: example4.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive2.tar :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar.gz :: example*.csv') ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv') ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } +select id, data, _size, _file, _path from s3(s3_conn, filename='03036_archive2.zip :: nonexistent{2..3}.csv'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } CREATE TABLE table_zip22 Engine S3(s3_conn, filename='03036_archive2.zip :: example2.csv'); -select id, data, _file, _path from table_zip22 ORDER BY (id, _file, _path); +select id, data, _size, _file, _path from table_zip22 ORDER BY (id, _file, _path); CREATE table table_tar2star Engine S3(s3_conn, filename='03036_archive2.tar :: example*.csv'); -SELECT id, data, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); CREATE table table_tarstarglobs Engine S3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv'); -SELECT id, data, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); +SELECT id, data, _size, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError UNKNOWN_STORAGE } -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) +SELECT id, data, _size, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh index 7c1ac41cfdc..887b2ed94d7 100755 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh @@ -8,7 +8,7 @@ CLICKHOUSE_LOG_COMMENT= . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --merge_max_block_size 8192 --merge_max_block_size_bytes=10485760 --index_granularity 8192" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" function test() { @@ -41,12 +41,12 @@ function test() $CH_CLIENT -q "drop table if exists test;" echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" test $CH_CLIENT -q "drop table test;" echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" test $CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh index 927ceac72b5..371ae87c2ef 100755 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh @@ -9,7 +9,7 @@ CLICKHOUSE_LOG_COMMENT= -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --merge_max_block_size 8192 --merge_max_block_size_bytes=10485760 --index_granularity 8192" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" function test() { echo "test" @@ -41,11 +41,11 @@ function test() $CH_CLIENT -q "drop table if exists test;" echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" test $CH_CLIENT -q "drop table test;" echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" test $CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03038_move_partition_to_oneself_deadlock.sql b/tests/queries/0_stateless/03038_move_partition_to_oneself_deadlock.sql index 6eefa5270c5..f3072fb3539 100644 --- a/tests/queries/0_stateless/03038_move_partition_to_oneself_deadlock.sql +++ b/tests/queries/0_stateless/03038_move_partition_to_oneself_deadlock.sql @@ -1,3 +1,5 @@ +SET optimize_trivial_insert_select = 1; + DROP TABLE IF EXISTS move_partition_to_oneself; CREATE TABLE move_partition_to_oneself (key UInt64 CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO move_partition_to_oneself SELECT number FROM numbers(1e6); diff --git a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh index ca313307a6d..60248f4453a 100755 --- a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh +++ b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh @@ -7,6 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh + # Fix some settings to avoid timeouts because of some settings randomization CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128" @@ -32,7 +33,7 @@ echo "MergeTree wide + horizontal merge" test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1" echo "MergeTree compact + vertical merge" -test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" +test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" echo "MergeTree wide + vertical merge" -test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1" +test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" diff --git a/tests/queries/0_stateless/03071_fix_short_circuit_logic.reference b/tests/queries/0_stateless/03071_fix_short_circuit_logic.reference new file mode 100644 index 00000000000..48aedfc3958 --- /dev/null +++ b/tests/queries/0_stateless/03071_fix_short_circuit_logic.reference @@ -0,0 +1 @@ +2024-01-02 16:54:59 diff --git a/tests/queries/0_stateless/03071_fix_short_circuit_logic.sql b/tests/queries/0_stateless/03071_fix_short_circuit_logic.sql new file mode 100644 index 00000000000..7745bceca0b --- /dev/null +++ b/tests/queries/0_stateless/03071_fix_short_circuit_logic.sql @@ -0,0 +1,62 @@ + + +CREATE FUNCTION IF NOT EXISTS unhexPrefixed AS value -> unhex(substring(value, 3)); +CREATE FUNCTION IF NOT EXISTS hex2bytes AS address -> CAST(unhexPrefixed(address), 'FixedString(20)'); +CREATE FUNCTION IF NOT EXISTS bytes2hex AS address -> concat('0x', lower(hex(address))); + +CREATE TABLE test +( + `transfer_id` String, + `address` FixedString(20), + `value` UInt256, + `block_timestamp` DateTime('UTC'), + `token_address` FixedString(20) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(block_timestamp) +PRIMARY KEY (address, block_timestamp) +ORDER BY (address, block_timestamp); + +INSERT INTO test SELECT 'token-transfer-0x758f1bbabb160683e1c80ed52dcd24a32b599d40edf1cec91b5f1199c0e392a2-56', hex2bytes('0xd387a6e4e84a6c86bd90c158c6028a58cc8ac459'), 3000000000000000000000, '2024-01-02 16:54:59', 'abc'; + +CREATE TABLE token_data +( + token_address_hex String, + chain String, + is_blacklisted Bool +) +ENGINE = TinyLog; + +INSERT INTO token_data SELECT bytes2hex('abc'), 'zksync', false; + +CREATE DICTIONARY token_data_map +( + token_address_hex String, + chain String, + is_blacklisted Bool +) +PRIMARY KEY token_address_hex, chain +SOURCE(Clickhouse(table token_data)) +LIFETIME(MIN 200 MAX 300) +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()); + +SELECT block_timestamp +FROM +( + SELECT + block_timestamp, + bytes2hex(token_address) AS token_address_hex + FROM + ( + SELECT + transfer_id, + address, + value, + block_timestamp, + token_address, + 'zksync' AS chain + FROM test + ) + WHERE (address = hex2bytes('0xd387a6e4e84a6c86bd90c158c6028a58cc8ac459')) AND (transfer_id NOT LIKE 'gas%') AND (value > 0) AND (dictGetOrDefault(token_data_map, 'is_blacklisted', (token_address_hex, 'zksync'), true)) +) +SETTINGS max_threads = 1, short_circuit_function_evaluation = 'enable', allow_experimental_analyzer = 0; \ No newline at end of file diff --git a/tests/queries/0_stateless/03094_one_thousand_joins.sql b/tests/queries/0_stateless/03094_one_thousand_joins.sql index ea159f0e4c0..1f6bd99df7f 100644 --- a/tests/queries/0_stateless/03094_one_thousand_joins.sql +++ b/tests/queries/0_stateless/03094_one_thousand_joins.sql @@ -1,6 +1,7 @@ -- Tags: no-fasttest, no-tsan, long -- (no-tsan because it has a small maximum stack size and the test would fail with TOO_DEEP_RECURSION) +SET join_algorithm = 'default'; -- for 'full_sorting_merge' the query is 10x slower SET allow_experimental_analyzer = 1; -- old analyzer returns TOO_DEEP_SUBQUERIES -- Bug 33446, marked as 'long' because it still runs around 10 sec diff --git a/tests/queries/0_stateless/03095_window_functions_qualify.reference b/tests/queries/0_stateless/03095_window_functions_qualify.reference index c74a212428b..9ffbe64f85e 100644 --- a/tests/queries/0_stateless/03095_window_functions_qualify.reference +++ b/tests/queries/0_stateless/03095_window_functions_qualify.reference @@ -48,8 +48,9 @@ Positions: 2 0 INPUT :: 1 -> count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) UInt64 : 1 INPUT : 2 -> count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) UInt64 : 2 COLUMN Const(UInt8) -> 4_UInt8 UInt8 : 3 - FUNCTION equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) :: 2, 4_UInt8 :: 3) -> equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)), 4_UInt8) UInt8 : 4 - Positions: 4 0 1 + INPUT :: 3 -> modulo(__table1.number, 3_UInt8) UInt8 : 4 + FUNCTION equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) :: 2, 4_UInt8 :: 3) -> equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)), 4_UInt8) UInt8 : 5 + Positions: 5 0 1 Window (Window step for window \'PARTITION BY modulo(__table1.number, 3_UInt8)\') Header: modulo(__table1.number, 3_UInt8) UInt8 __table1.number UInt64 diff --git a/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.reference b/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.reference index 6ca5b6ef572..d35bdeff98b 100644 --- a/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.reference +++ b/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.reference @@ -25,13 +25,12 @@ Positions: 4 0 2 1 Header: __table1.id UInt64 __table1.value String Actions: INPUT : 1 -> value String : 0 - INPUT :: 0 -> __table1.id UInt64 : 1 - INPUT :: 2 -> id UInt64 : 2 - ALIAS value :: 0 -> __table1.value String : 3 - Positions: 1 3 + INPUT : 0 -> id UInt64 : 1 + ALIAS value :: 0 -> __table1.value String : 2 + ALIAS id :: 1 -> __table1.id UInt64 : 0 + Positions: 0 2 ReadFromMergeTree (default.test_table_1) - Header: __table1.id UInt64 - id UInt64 + Header: id UInt64 value String ReadType: Default Parts: 1 @@ -42,20 +41,18 @@ Positions: 4 0 2 1 Prewhere filter column: notEquals(__table1.id, 0_UInt8) (removed) Actions: INPUT : 0 -> id UInt64 : 0 COLUMN Const(UInt8) -> 0_UInt8 UInt8 : 1 - ALIAS id : 0 -> __table1.id UInt64 : 2 - FUNCTION notEquals(__table1.id : 2, 0_UInt8 :: 1) -> notEquals(__table1.id, 0_UInt8) UInt8 : 3 - Positions: 2 0 3 + FUNCTION notEquals(id : 0, 0_UInt8 :: 1) -> notEquals(__table1.id, 0_UInt8) UInt8 : 2 + Positions: 0 2 Expression Header: __table2.id UInt64 __table2.value String Actions: INPUT : 1 -> value String : 0 - INPUT :: 0 -> __table2.id UInt64 : 1 - INPUT :: 2 -> id UInt64 : 2 - ALIAS value :: 0 -> __table2.value String : 3 - Positions: 1 3 + INPUT : 0 -> id UInt64 : 1 + ALIAS value :: 0 -> __table2.value String : 2 + ALIAS id :: 1 -> __table2.id UInt64 : 0 + Positions: 0 2 ReadFromMergeTree (default.test_table_2) - Header: __table2.id UInt64 - id UInt64 + Header: id UInt64 value String ReadType: Default Parts: 1 @@ -66,9 +63,8 @@ Positions: 4 0 2 1 Prewhere filter column: notEquals(__table2.id, 0_UInt8) (removed) Actions: INPUT : 0 -> id UInt64 : 0 COLUMN Const(UInt8) -> 0_UInt8 UInt8 : 1 - ALIAS id : 0 -> __table2.id UInt64 : 2 - FUNCTION notEquals(__table2.id : 2, 0_UInt8 :: 1) -> notEquals(__table2.id, 0_UInt8) UInt8 : 3 - Positions: 2 0 3 + FUNCTION notEquals(id : 0, 0_UInt8 :: 1) -> notEquals(__table2.id, 0_UInt8) UInt8 : 2 + Positions: 0 2 -- 2 Value_2 2 Value_2 -- @@ -99,13 +95,12 @@ Positions: 4 0 2 1 Header: __table1.id UInt64 __table1.value String Actions: INPUT : 1 -> value String : 0 - INPUT :: 0 -> __table1.id UInt64 : 1 - INPUT :: 2 -> id UInt64 : 2 - ALIAS value :: 0 -> __table1.value String : 3 - Positions: 1 3 + INPUT : 0 -> id UInt64 : 1 + ALIAS value :: 0 -> __table1.value String : 2 + ALIAS id :: 1 -> __table1.id UInt64 : 0 + Positions: 0 2 ReadFromMergeTree (default.test_table_1) - Header: __table1.id UInt64 - id UInt64 + Header: id UInt64 value String ReadType: Default Parts: 1 @@ -116,20 +111,18 @@ Positions: 4 0 2 1 Prewhere filter column: notEquals(__table1.id, 0_UInt8) (removed) Actions: INPUT : 0 -> id UInt64 : 0 COLUMN Const(UInt8) -> 0_UInt8 UInt8 : 1 - ALIAS id : 0 -> __table1.id UInt64 : 2 - FUNCTION notEquals(__table1.id : 2, 0_UInt8 :: 1) -> notEquals(__table1.id, 0_UInt8) UInt8 : 3 - Positions: 2 0 3 + FUNCTION notEquals(id : 0, 0_UInt8 :: 1) -> notEquals(__table1.id, 0_UInt8) UInt8 : 2 + Positions: 0 2 Expression Header: __table2.id UInt64 __table2.value String Actions: INPUT : 1 -> value String : 0 - INPUT :: 0 -> __table2.id UInt64 : 1 - INPUT :: 2 -> id UInt64 : 2 - ALIAS value :: 0 -> __table2.value String : 3 - Positions: 1 3 + INPUT : 0 -> id UInt64 : 1 + ALIAS value :: 0 -> __table2.value String : 2 + ALIAS id :: 1 -> __table2.id UInt64 : 0 + Positions: 0 2 ReadFromMergeTree (default.test_table_2) - Header: __table2.id UInt64 - id UInt64 + Header: id UInt64 value String ReadType: Default Parts: 1 @@ -140,9 +133,8 @@ Positions: 4 0 2 1 Prewhere filter column: notEquals(__table2.id, 0_UInt8) (removed) Actions: INPUT : 0 -> id UInt64 : 0 COLUMN Const(UInt8) -> 0_UInt8 UInt8 : 1 - ALIAS id : 0 -> __table2.id UInt64 : 2 - FUNCTION notEquals(__table2.id : 2, 0_UInt8 :: 1) -> notEquals(__table2.id, 0_UInt8) UInt8 : 3 - Positions: 2 0 3 + FUNCTION notEquals(id : 0, 0_UInt8 :: 1) -> notEquals(__table2.id, 0_UInt8) UInt8 : 2 + Positions: 0 2 -- 2 Value_2 2 Value_2 -- @@ -173,13 +165,12 @@ Positions: 4 0 2 1 Header: __table1.id UInt64 __table1.value String Actions: INPUT : 1 -> value String : 0 - INPUT :: 0 -> __table1.id UInt64 : 1 - INPUT :: 2 -> id UInt64 : 2 - ALIAS value :: 0 -> __table1.value String : 3 - Positions: 1 3 + INPUT : 0 -> id UInt64 : 1 + ALIAS value :: 0 -> __table1.value String : 2 + ALIAS id :: 1 -> __table1.id UInt64 : 0 + Positions: 0 2 ReadFromMergeTree (default.test_table_1) - Header: __table1.id UInt64 - id UInt64 + Header: id UInt64 value String ReadType: Default Parts: 1 @@ -190,22 +181,20 @@ Positions: 4 0 2 1 Prewhere filter column: and(notEquals(__table1.id, 0_UInt8), notEquals(__table1.id, 0_UInt8)) (removed) Actions: INPUT : 0 -> id UInt64 : 0 COLUMN Const(UInt8) -> 0_UInt8 UInt8 : 1 - ALIAS id : 0 -> __table1.id UInt64 : 2 - FUNCTION notEquals(__table1.id : 2, 0_UInt8 : 1) -> notEquals(__table1.id, 0_UInt8) UInt8 : 3 - FUNCTION notEquals(__table1.id : 2, 0_UInt8 :: 1) -> notEquals(__table1.id, 0_UInt8) UInt8 : 4 - FUNCTION and(notEquals(__table1.id, 0_UInt8) :: 4, notEquals(__table1.id, 0_UInt8) :: 3) -> and(notEquals(__table1.id, 0_UInt8), notEquals(__table1.id, 0_UInt8)) UInt8 : 1 - Positions: 2 0 1 + FUNCTION notEquals(id : 0, 0_UInt8 : 1) -> notEquals(__table1.id, 0_UInt8) UInt8 : 2 + FUNCTION notEquals(id : 0, 0_UInt8 :: 1) -> notEquals(__table1.id, 0_UInt8) UInt8 : 3 + FUNCTION and(notEquals(__table1.id, 0_UInt8) :: 3, notEquals(__table1.id, 0_UInt8) :: 2) -> and(notEquals(__table1.id, 0_UInt8), notEquals(__table1.id, 0_UInt8)) UInt8 : 1 + Positions: 0 1 Expression Header: __table2.id UInt64 __table2.value String Actions: INPUT : 1 -> value String : 0 - INPUT :: 0 -> __table2.id UInt64 : 1 - INPUT :: 2 -> id UInt64 : 2 - ALIAS value :: 0 -> __table2.value String : 3 - Positions: 1 3 + INPUT : 0 -> id UInt64 : 1 + ALIAS value :: 0 -> __table2.value String : 2 + ALIAS id :: 1 -> __table2.id UInt64 : 0 + Positions: 0 2 ReadFromMergeTree (default.test_table_2) - Header: __table2.id UInt64 - id UInt64 + Header: id UInt64 value String ReadType: Default Parts: 1 @@ -216,10 +205,9 @@ Positions: 4 0 2 1 Prewhere filter column: and(notEquals(__table2.id, 0_UInt8), notEquals(__table2.id, 0_UInt8)) (removed) Actions: INPUT : 0 -> id UInt64 : 0 COLUMN Const(UInt8) -> 0_UInt8 UInt8 : 1 - ALIAS id : 0 -> __table2.id UInt64 : 2 - FUNCTION notEquals(__table2.id : 2, 0_UInt8 : 1) -> notEquals(__table2.id, 0_UInt8) UInt8 : 3 - FUNCTION notEquals(__table2.id : 2, 0_UInt8 :: 1) -> notEquals(__table2.id, 0_UInt8) UInt8 : 4 - FUNCTION and(notEquals(__table2.id, 0_UInt8) :: 4, notEquals(__table2.id, 0_UInt8) :: 3) -> and(notEquals(__table2.id, 0_UInt8), notEquals(__table2.id, 0_UInt8)) UInt8 : 1 - Positions: 2 0 1 + FUNCTION notEquals(id : 0, 0_UInt8 : 1) -> notEquals(__table2.id, 0_UInt8) UInt8 : 2 + FUNCTION notEquals(id : 0, 0_UInt8 :: 1) -> notEquals(__table2.id, 0_UInt8) UInt8 : 3 + FUNCTION and(notEquals(__table2.id, 0_UInt8) :: 3, notEquals(__table2.id, 0_UInt8) :: 2) -> and(notEquals(__table2.id, 0_UInt8), notEquals(__table2.id, 0_UInt8)) UInt8 : 1 + Positions: 0 1 -- 2 Value_2 2 Value_2 diff --git a/tests/queries/0_stateless/03135_keeper_client_find_commands.sh b/tests/queries/0_stateless/03135_keeper_client_find_commands.sh index 0f57694028d..43ffdec7346 100755 --- a/tests/queries/0_stateless/03135_keeper_client_find_commands.sh +++ b/tests/queries/0_stateless/03135_keeper_client_find_commands.sh @@ -6,24 +6,24 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) path="/test-keeper-client-$CLICKHOUSE_DATABASE" -$CLICKHOUSE_KEEPER_CLIENT -q "rm $path" >& /dev/null +$CLICKHOUSE_KEEPER_CLIENT -q "rm '$path'" >& /dev/null -$CLICKHOUSE_KEEPER_CLIENT -q "create $path 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/a 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/a/a 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/b 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/c 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/a 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/b 'foobar'" -$CLICKHOUSE_KEEPER_CLIENT -q "create $path/1/d/c 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/a' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/a/a' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/b' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/c' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/d' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/d/a' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/d/b' 'foobar'" +$CLICKHOUSE_KEEPER_CLIENT -q "create '$path/1/d/c' 'foobar'" echo 'find_super_nodes' $CLICKHOUSE_KEEPER_CLIENT -q "find_super_nodes 1000000000" -$CLICKHOUSE_KEEPER_CLIENT -q "find_super_nodes 3 $path" | sort +$CLICKHOUSE_KEEPER_CLIENT -q "find_super_nodes 3 '$path'" | sort echo 'find_big_family' -$CLICKHOUSE_KEEPER_CLIENT -q "find_big_family $path 3" +$CLICKHOUSE_KEEPER_CLIENT -q "find_big_family '$path' 3" -$CLICKHOUSE_KEEPER_CLIENT -q "rmr $path" +$CLICKHOUSE_KEEPER_CLIENT -q "rmr '$path'" diff --git a/tests/queries/0_stateless/03142_alter_comment_parameterized_view.reference b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.reference new file mode 100644 index 00000000000..9b93c75ea56 --- /dev/null +++ b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.reference @@ -0,0 +1 @@ +CREATE VIEW default.test_table_comment AS (SELECT toString({date_from:String})) COMMENT \'test comment\' diff --git a/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql new file mode 100644 index 00000000000..98318e99e4a --- /dev/null +++ b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS test_table_comment; +CREATE VIEW test_table_comment AS SELECT toString({date_from:String}); +ALTER TABLE test_table_comment MODIFY COMMENT 'test comment'; +SELECT create_table_query FROM system.tables WHERE name = 'test_table_comment' AND database = currentDatabase(); +DROP TABLE test_table_comment; diff --git a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference index 864f62d3113..6d375fd471a 100644 --- a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference +++ b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference @@ -1,5 +1,10 @@ ┏━━━┓ ┃ x ┃ ┡━━━┩ -1. │ █ │ +1. │ █ │ └───┘ + ┏━━━━━━━━━┳━━━━━━━━━━┓ + ┃ 'Hello' ┃ x ┃ + ┡━━━━━━━━━╇━━━━━━━━━━┩ +1. │ Hello │ █ test █ │ + └─────────┴──────────┘ diff --git a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql index e37b0db08e9..49f689a4cc5 100644 --- a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql +++ b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql @@ -1 +1,2 @@ -SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 0) AS x FORMAT Pretty; +SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x FORMAT Pretty; +SELECT 'Hello', format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x FORMAT Pretty; diff --git a/tests/queries/0_stateless/03143_prewhere_profile_events.sh b/tests/queries/0_stateless/03143_prewhere_profile_events.sh index 863fcc1fe01..00daa0fe7cc 100755 --- a/tests/queries/0_stateless/03143_prewhere_profile_events.sh +++ b/tests/queries/0_stateless/03143_prewhere_profile_events.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-random-merge-tree-settings +# Tags: no-random-settings, no-random-merge-tree-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -nq " DROP TABLE IF EXISTS t; - CREATE TABLE t(a UInt32, b UInt32, c UInt32, d UInt32) ENGINE=MergeTree ORDER BY a SETTINGS min_bytes_for_wide_part=1, min_rows_for_wide_part=1; + CREATE TABLE t(a UInt32, b UInt32, c UInt32, d UInt32) ENGINE=MergeTree ORDER BY a SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=0; INSERT INTO t SELECT number, number, number, number FROM numbers_mt(1e7); diff --git a/tests/queries/0_stateless/03146_parameterized_view_with_date.reference b/tests/queries/0_stateless/03146_parameterized_view_with_date.reference new file mode 100644 index 00000000000..1d6227dbbcb --- /dev/null +++ b/tests/queries/0_stateless/03146_parameterized_view_with_date.reference @@ -0,0 +1 @@ +2 2024-04-01 01:00:00 diff --git a/tests/queries/0_stateless/03146_parameterized_view_with_date.sql b/tests/queries/0_stateless/03146_parameterized_view_with_date.sql new file mode 100644 index 00000000000..2cfadb70b24 --- /dev/null +++ b/tests/queries/0_stateless/03146_parameterized_view_with_date.sql @@ -0,0 +1,14 @@ + +drop table if exists table_pv; +create table table_pv (id Int32, timestamp_field DateTime) engine = Memory(); + +insert into table_pv values(1, '2024-03-01 00:00:00'); +insert into table_pv values (2, '2024-04-01 01:00:00'); + +create view pv as select * from table_pv where timestamp_field > {timestamp_param:DateTime}; + +select * from pv (timestamp_param=toDateTime('2024-04-01 00:00:01')); + +select * from pv (timestamp_param=toDateTime('2024-040')); -- { serverError CANNOT_PARSE_DATETIME } + +drop table table_pv; diff --git a/tests/queries/0_stateless/03155_analyzer_interpolate.reference b/tests/queries/0_stateless/03155_analyzer_interpolate.reference index 791aaa5b2a2..eade3b45d26 100644 --- a/tests/queries/0_stateless/03155_analyzer_interpolate.reference +++ b/tests/queries/0_stateless/03155_analyzer_interpolate.reference @@ -11,3 +11,8 @@ 5 [5] 5.5 [5] 7 [7] +2 +100500 +18 +26 +34 diff --git a/tests/queries/0_stateless/03155_analyzer_interpolate.sql b/tests/queries/0_stateless/03155_analyzer_interpolate.sql index b3c1d233f47..30423cb86ff 100644 --- a/tests/queries/0_stateless/03155_analyzer_interpolate.sql +++ b/tests/queries/0_stateless/03155_analyzer_interpolate.sql @@ -10,3 +10,6 @@ SELECT n, number+5 AS inter FROM ( -- { serverError NOT_AN_AGGREGATE } SELECT toFloat32(number % 10) AS n, number, number*2 AS mn FROM numbers(10) WHERE number % 3 = 1 ) GROUP BY n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS mn * 2); + +-- https://github.com/ClickHouse/ClickHouse/issues/64636 +select sum(number) as s from remote('127.0.0.{1,2}', numbers(10)) where (intDiv(number, 2) as key) != 1 group by key order by key with fill interpolate (s as 100500); diff --git a/tests/queries/0_stateless/03155_test_move_to_prewhere.reference b/tests/queries/0_stateless/03155_test_move_to_prewhere.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/03155_test_move_to_prewhere.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/03155_test_move_to_prewhere.sh b/tests/queries/0_stateless/03155_test_move_to_prewhere.sh new file mode 100755 index 00000000000..b6980b3a23a --- /dev/null +++ b/tests/queries/0_stateless/03155_test_move_to_prewhere.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -nq " + CREATE TABLE event_envoy + ( + timestamp_interval DateTime CODEC(DoubleDelta), + region LowCardinality(String), + cluster LowCardinality(String) + ) + ENGINE = MergeTree + ORDER BY (timestamp_interval) + SETTINGS index_granularity = 8192; + + INSERT INTO event_envoy SELECT now() - number, 'us-east-1', 'ch_super_fast' FROM numbers_mt(1e5); +" + +${CLICKHOUSE_CLIENT} -nq " + CREATE TABLE event_envoy_remote + ( + timestamp_interval DateTime CODEC(DoubleDelta), + region LowCardinality(String), + cluster LowCardinality(String) + ) AS remote('127.0.0.1', '${CLICKHOUSE_DATABASE}', event_envoy); +" + +${CLICKHOUSE_CLIENT} -q " + CREATE TABLE global_event_envoy + ( + timestamp_interval DateTime, + region LowCardinality(String), + cluster LowCardinality(String) + ) + ENGINE = Merge('${CLICKHOUSE_DATABASE}', 'event_envoy.*'); +" + +${CLICKHOUSE_CLIENT} --prefer_localhost_replica 1 -q " + EXPLAIN indexes=1 + SELECT timestamp_interval + FROM global_event_envoy + WHERE timestamp_interval <= now() - 54321 AND region = 'us-east-1' +" | grep -c 'Condition.*timestamp_interval' + diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference index b5b2aec9c12..18830a293bd 100644 --- a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference +++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference @@ -10,3 +10,5 @@ Hello 1 Hello 1 Hello 2 Hello 2 +2020-01-01 a 2 +2020-01-01 b 4 diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql index f605a369822..55f9877b2ac 100644 --- a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql +++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql @@ -8,3 +8,21 @@ SELECT s, arr, a FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) AR SELECT s, arr FROM remote('127.0.0.2', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr; SELECT s, arr FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr; + +create table hourly( + hour datetime, + `metric.names` Array(String), + `metric.values` Array(Int64) +) Engine=Memory +as select '2020-01-01', ['a', 'b'], [1,2]; + +SELECT + toDate(hour) AS day, + `metric.names`, + sum(`metric.values`) +FROM remote('127.0.0.{1,2}', currentDatabase(), hourly) +ARRAY JOIN metric +GROUP BY + day, + metric.names +ORDER BY metric.names; diff --git a/tests/queries/0_stateless/03156_group_concat.reference b/tests/queries/0_stateless/03156_group_concat.reference new file mode 100644 index 00000000000..c1ab35e96c0 --- /dev/null +++ b/tests/queries/0_stateless/03156_group_concat.reference @@ -0,0 +1,19 @@ +0 95 abc [1,2,3] +1 \N a [993,986,979,972] +2 123 makson95 [] +95123 +abcamakson95 +[1,2,3][993,986,979,972][] +[1,2,3] +abcamakson95 +95123 +95\n123 +95,123 +abc,a,makson95 +[1,2,3],[993,986,979,972] +\N +951239512395123 +abc,a,makson95,abc,a,makson95,abc,a,makson95 +[1,2,3][993,986,979,972][][1,2,3][993,986,979,972][][1,2,3][993,986,979,972][] +488890 +488890 diff --git a/tests/queries/0_stateless/03156_group_concat.sql b/tests/queries/0_stateless/03156_group_concat.sql new file mode 100644 index 00000000000..0d561c69f0a --- /dev/null +++ b/tests/queries/0_stateless/03156_group_concat.sql @@ -0,0 +1,57 @@ +DROP TABLE IF EXISTS test_groupConcat; +CREATE TABLE test_groupConcat +( + id UInt64, + p_int Int32 NULL, + p_string String, + p_array Array(Int32) +) ENGINE = MergeTree ORDER BY id; + +SET max_insert_threads = 1, max_threads = 1, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; +INSERT INTO test_groupConcat VALUES (0, 95, 'abc', [1, 2, 3]), (1, NULL, 'a', [993, 986, 979, 972]), (2, 123, 'makson95', []); + +SELECT * FROM test_groupConcat; + +SELECT groupConcat(p_int) FROM test_groupConcat; +SELECT groupConcat(p_string) FROM test_groupConcat; +SELECT groupConcat(p_array) FROM test_groupConcat; + +SELECT groupConcat('', 1)(p_array) FROM test_groupConcat; +SELECT groupConcat('', 3)(p_string) FROM test_groupConcat; +SELECT groupConcat('', 2)(p_int) FROM test_groupConcat; +SELECT groupConcat('\n', 3)(p_int) FROM test_groupConcat; + +SELECT groupConcat(',')(p_int) FROM test_groupConcat; +SELECT groupConcat(',')(p_string) FROM test_groupConcat; +SELECT groupConcat(',', 2)(p_array) FROM test_groupConcat; + +SELECT groupConcat(p_int) FROM test_groupConcat WHERE id = 1; + +INSERT INTO test_groupConcat VALUES (0, 95, 'abc', [1, 2, 3]), (1, NULL, 'a', [993, 986, 979, 972]), (2, 123, 'makson95', []); +INSERT INTO test_groupConcat VALUES (0, 95, 'abc', [1, 2, 3]), (1, NULL, 'a', [993, 986, 979, 972]), (2, 123, 'makson95', []); + +SELECT groupConcat(p_int) FROM test_groupConcat; +SELECT groupConcat(',')(p_string) FROM test_groupConcat; +SELECT groupConcat(p_array) FROM test_groupConcat; + +SELECT groupConcat(123)(number) FROM numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT groupConcat(',', '3')(number) FROM numbers(10); -- { serverError BAD_ARGUMENTS } +SELECT groupConcat(',', 0)(number) FROM numbers(10); -- { serverError BAD_ARGUMENTS } +SELECT groupConcat(',', -1)(number) FROM numbers(10); -- { serverError BAD_ARGUMENTS } +SELECT groupConcat(',', 3, 3)(number) FROM numbers(10); -- { serverError TOO_MANY_ARGUMENTS_FOR_FUNCTION } + +SELECT length(groupConcat(number)) FROM numbers(100000); + +DROP TABLE IF EXISTS test_groupConcat; + +CREATE TABLE test_groupConcat +( + id UInt64, + p_int Int32, +) ENGINE = MergeTree ORDER BY id; + +INSERT INTO test_groupConcat SELECT number, number FROM numbers(100000) SETTINGS min_insert_block_size_rows = 2000; + +SELECT length(groupConcat(p_int)) FROM test_groupConcat; + +DROP TABLE IF EXISTS test_groupConcat; diff --git a/tests/queries/0_stateless/03156_nullable_number_tips.sql b/tests/queries/0_stateless/03156_nullable_number_tips.sql index e6f2fa36d86..9a494e3292b 100644 --- a/tests/queries/0_stateless/03156_nullable_number_tips.sql +++ b/tests/queries/0_stateless/03156_nullable_number_tips.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_display_footer_column_names=0; SELECT 123456789 AS x FORMAT PrettyCompact; SELECT toNullable(123456789) AS x FORMAT PrettyCompact; SELECT toLowCardinality(toNullable(123456789)) AS x FORMAT PrettyCompact; diff --git a/tests/queries/0_stateless/03160_pretty_format_tty.sh b/tests/queries/0_stateless/03160_pretty_format_tty.sh index bbc4b96eb90..200bd52f3fa 100755 --- a/tests/queries/0_stateless/03160_pretty_format_tty.sh +++ b/tests/queries/0_stateless/03160_pretty_format_tty.sh @@ -5,4 +5,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # default output_format_pretty_max_rows is 10K -$CLICKHOUSE_LOCAL -q "select * from numbers(100e3) format PrettySpace settings max_threads=1" | wc -l +$CLICKHOUSE_LOCAL -q "select * from numbers(100e3) format PrettySpace settings max_threads=1, output_format_pretty_display_footer_column_names=0" | wc -l diff --git a/tests/queries/0_stateless/03164_adapting_parquet_reader_output_size.reference b/tests/queries/0_stateless/03164_adapting_parquet_reader_output_size.reference new file mode 100644 index 00000000000..ef9b07ba955 --- /dev/null +++ b/tests/queries/0_stateless/03164_adapting_parquet_reader_output_size.reference @@ -0,0 +1,4 @@ +65409 +16 +128 +2363 diff --git a/tests/queries/0_stateless/03164_adapting_parquet_reader_output_size.sql b/tests/queries/0_stateless/03164_adapting_parquet_reader_output_size.sql new file mode 100644 index 00000000000..fa098b64702 --- /dev/null +++ b/tests/queries/0_stateless/03164_adapting_parquet_reader_output_size.sql @@ -0,0 +1,25 @@ +-- Tags: no-fasttest, no-parallel, no-random-settings + +set max_insert_threads=1; + +DROP TABLE IF EXISTS test_parquet; +CREATE TABLE test_parquet (col1 String, col2 String, col3 String, col4 String, col5 String, col6 String, col7 String) ENGINE=File(Parquet); +INSERT INTO test_parquet SELECT rand(),rand(),rand(),rand(),rand(),rand(),rand() FROM numbers(100000); +SELECT max(blockSize()) FROM test_parquet; + +DROP TABLE IF EXISTS test_parquet; +CREATE TABLE test_parquet (col1 String, col2 String, col3 String, col4 String, col5 String, col6 String, col7 String) ENGINE=File(Parquet) settings input_format_parquet_max_block_size=16; +INSERT INTO test_parquet SELECT rand(),rand(),rand(),rand(),rand(),rand(),rand() FROM numbers(100000); +SELECT max(blockSize()) FROM test_parquet; + +DROP TABLE IF EXISTS test_parquet; +CREATE TABLE test_parquet (col1 String, col2 String, col3 String, col4 String, col5 String, col6 String, col7 String) ENGINE=File(Parquet) settings input_format_parquet_prefer_block_bytes=30; +INSERT INTO test_parquet SELECT rand(),rand(),rand(),rand(),rand(),rand(),rand() FROM numbers(100000); +SELECT max(blockSize()) FROM test_parquet; + +DROP TABLE IF EXISTS test_parquet; +CREATE TABLE test_parquet (col1 String, col2 String, col3 String, col4 String, col5 String, col6 String, col7 String) ENGINE=File(Parquet) settings input_format_parquet_prefer_block_bytes=30720; +INSERT INTO test_parquet SELECT rand(),rand(),rand(),rand(),rand(),rand(),rand() FROM numbers(100000); +SELECT max(blockSize()) FROM test_parquet; + +DROP TABLE IF EXISTS test_parquet; \ No newline at end of file diff --git a/tests/queries/0_stateless/03164_early_constant_folding_analyzer.reference b/tests/queries/0_stateless/03164_early_constant_folding_analyzer.reference new file mode 100644 index 00000000000..227b118bb7f --- /dev/null +++ b/tests/queries/0_stateless/03164_early_constant_folding_analyzer.reference @@ -0,0 +1 @@ +ReadFromPreparedSource (Optimized trivial count) diff --git a/tests/queries/0_stateless/03164_early_constant_folding_analyzer.sql b/tests/queries/0_stateless/03164_early_constant_folding_analyzer.sql new file mode 100644 index 00000000000..dbffbc1af71 --- /dev/null +++ b/tests/queries/0_stateless/03164_early_constant_folding_analyzer.sql @@ -0,0 +1,30 @@ +CREATE TABLE checks +( + `pull_request_number` UInt32, + `commit_sha` LowCardinality(String), + `check_name` LowCardinality(String), + `check_status` LowCardinality(String), + `check_duration_ms` UInt64, + `check_start_time` DateTime, + `test_name` LowCardinality(String), + `test_status` LowCardinality(String), + `test_duration_ms` UInt64, + `report_url` String, + `pull_request_url` String, + `commit_url` String, + `task_url` String, + `base_ref` String, + `base_repo` String, + `head_ref` String, + `head_repo` String, + `test_context_raw` String, + `instance_type` LowCardinality(String), + `instance_id` String, + `date` Date MATERIALIZED toDate(check_start_time) +) +ENGINE = MergeTree ORDER BY (date, pull_request_number, commit_sha, check_name, test_name, check_start_time); + +insert into checks select * from generateRandom() limit 1; + + +select trimLeft(explain) from (explain SELECT count(1) FROM checks WHERE test_name IS NOT NULL) where explain like '%ReadFromPreparedSource%' SETTINGS allow_experimental_analyzer = 1, allow_experimental_parallel_reading_from_replicas = 0; diff --git a/tests/queries/0_stateless/03164_linestring_geometry.reference b/tests/queries/0_stateless/03164_linestring_geometry.reference new file mode 100644 index 00000000000..1f68df04614 --- /dev/null +++ b/tests/queries/0_stateless/03164_linestring_geometry.reference @@ -0,0 +1,11 @@ +-- { echoOn } +SELECT readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)'); +[(1,1),(2,2),(3,3),(1,1)] +SELECT toTypeName(readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)')); +LineString +SELECT wkt(readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)')); +LINESTRING(1 1,2 2,3 3,1 1) +-- Native Array(Tuple(Float64, Float64)) is threated as Ring, not as LineString. +WITH wkt(CAST([(1, 1), (2, 2), (3, 3)], 'Array(Tuple(Float64, Float64))')) as x +SELECT x, toTypeName(x), readWKTRing(x) as y, toTypeName(y); +POLYGON((1 1,2 2,3 3)) String [(1,1),(2,2),(3,3)] Ring diff --git a/tests/queries/0_stateless/03164_linestring_geometry.sql b/tests/queries/0_stateless/03164_linestring_geometry.sql new file mode 100644 index 00000000000..e4f1d1295e7 --- /dev/null +++ b/tests/queries/0_stateless/03164_linestring_geometry.sql @@ -0,0 +1,8 @@ +-- { echoOn } +SELECT readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)'); +SELECT toTypeName(readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)')); +SELECT wkt(readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)')); + +-- Native Array(Tuple(Float64, Float64)) is threated as Ring, not as LineString. +WITH wkt(CAST([(1, 1), (2, 2), (3, 3)], 'Array(Tuple(Float64, Float64))')) as x +SELECT x, toTypeName(x), readWKTRing(x) as y, toTypeName(y); diff --git a/tests/queries/0_stateless/03164_materialize_statistics.reference b/tests/queries/0_stateless/03164_materialize_statistics.reference index c209d2e8b63..5e969cf41cb 100644 --- a/tests/queries/0_stateless/03164_materialize_statistics.reference +++ b/tests/queries/0_stateless/03164_materialize_statistics.reference @@ -1,10 +1,10 @@ 10 10 10 -statistic not used Condition less(b, 10_UInt8) moved to PREWHERE -statistic not used Condition less(a, 10_UInt8) moved to PREWHERE -statistic used after merge Condition less(a, 10_UInt8) moved to PREWHERE -statistic used after merge Condition less(b, 10_UInt8) moved to PREWHERE -statistic used after materialize Condition less(a, 10_UInt8) moved to PREWHERE -statistic used after materialize Condition less(b, 10_UInt8) moved to PREWHERE +statistics not used Condition less(b, 10_UInt8) moved to PREWHERE +statistics not used Condition less(a, 10_UInt8) moved to PREWHERE +statistics used after merge Condition less(a, 10_UInt8) moved to PREWHERE +statistics used after merge Condition less(b, 10_UInt8) moved to PREWHERE +statistics used after materialize Condition less(a, 10_UInt8) moved to PREWHERE +statistics used after materialize Condition less(b, 10_UInt8) moved to PREWHERE 2 0 diff --git a/tests/queries/0_stateless/03164_materialize_statistics.sql b/tests/queries/0_stateless/03164_materialize_statistics.sql index 763644d16ab..43c5724dd59 100644 --- a/tests/queries/0_stateless/03164_materialize_statistics.sql +++ b/tests/queries/0_stateless/03164_materialize_statistics.sql @@ -1,34 +1,34 @@ -DROP TABLE IF EXISTS t_statistic_materialize; +DROP TABLE IF EXISTS t_statistics_materialize; SET allow_experimental_analyzer = 1; -SET allow_experimental_statistic = 1; -SET allow_statistic_optimize = 1; +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; SET materialize_statistics_on_insert = 0; -CREATE TABLE t_statistic_materialize +CREATE TABLE t_statistics_materialize ( - a Int64 STATISTIC(tdigest), - b Int16 STATISTIC(tdigest), + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), ) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. -INSERT INTO t_statistic_materialize SELECT number, -number FROM system.numbers LIMIT 10000; +INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; -SELECT count(*) FROM t_statistic_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistic not used'; +SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; -OPTIMIZE TABLE t_statistic_materialize FINAL; +OPTIMIZE TABLE t_statistics_materialize FINAL; -SELECT count(*) FROM t_statistic_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistic used after merge'; +SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; -TRUNCATE TABLE t_statistic_materialize; +TRUNCATE TABLE t_statistics_materialize; SET mutations_sync = 2; -INSERT INTO t_statistic_materialize SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE t_statistic_materialize MATERIALIZE STATISTIC a, b TYPE tdigest; +INSERT INTO t_statistics_materialize SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE t_statistics_materialize MATERIALIZE STATISTICS a, b; -SELECT count(*) FROM t_statistic_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistic used after materialize'; +SELECT count(*) FROM t_statistics_materialize WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; -DROP TABLE t_statistic_materialize; +DROP TABLE t_statistics_materialize; SYSTEM FLUSH LOGS; @@ -36,7 +36,7 @@ SELECT log_comment, message FROM system.text_log JOIN ( SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log WHERE current_database = currentDatabase() - AND query LIKE 'SELECT count(*) FROM t_statistic_materialize%' + AND query LIKE 'SELECT count(*) FROM t_statistics_materialize%' AND type = 'QueryFinish' ) AS query_log USING (query_id) WHERE message LIKE '%moved to PREWHERE%' @@ -45,5 +45,5 @@ ORDER BY event_time_microseconds; SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) FROM system.query_log WHERE current_database = currentDatabase() - AND query LIKE 'INSERT INTO t_statistic_materialize SELECT%' + AND query LIKE 'INSERT INTO t_statistics_materialize SELECT%' AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/03164_orc_signedness.reference b/tests/queries/0_stateless/03164_orc_signedness.reference new file mode 100644 index 00000000000..3ee822a94c1 --- /dev/null +++ b/tests/queries/0_stateless/03164_orc_signedness.reference @@ -0,0 +1,41 @@ +-- { echoOn } +select x from file('i8.orc') where indexHint(x = -128); +-128 +select x from file('i8.orc') where indexHint(x = 128); +select x from file('u8.orc') where indexHint(x = -128); +-128 +select x from file('u8.orc') where indexHint(x = 128); +select x from file('i16.orc') where indexHint(x = -32768); +-32768 +select x from file('i16.orc') where indexHint(x = 32768); +select x from file('u16.orc') where indexHint(x = -32768); +-32768 +select x from file('u16.orc') where indexHint(x = 32768); +select x from file('i32.orc') where indexHint(x = -2147483648); +-2147483648 +select x from file('i32.orc') where indexHint(x = 2147483648); +select x from file('u32.orc') where indexHint(x = -2147483648); +-2147483648 +select x from file('u32.orc') where indexHint(x = 2147483648); +select x from file('i64.orc') where indexHint(x = -9223372036854775808); +-9223372036854775808 +select x from file('i64.orc') where indexHint(x = 9223372036854775808); +-9223372036854775808 +select x from file('u64.orc') where indexHint(x = -9223372036854775808); +-9223372036854775808 +select x from file('u64.orc') where indexHint(x = 9223372036854775808); +-9223372036854775808 +select x from file('u8.orc', ORC, 'x UInt8') where indexHint(x > 10); +128 +select x from file('u8.orc', ORC, 'x UInt64') where indexHint(x > 10); +18446744073709551488 +select x from file('u16.orc', ORC, 'x UInt16') where indexHint(x > 10); +32768 +select x from file('u16.orc', ORC, 'x UInt64') where indexHint(x > 10); +18446744073709518848 +select x from file('u32.orc', ORC, 'x UInt32') where indexHint(x > 10); +2147483648 +select x from file('u32.orc', ORC, 'x UInt64') where indexHint(x > 10); +18446744071562067968 +select x from file('u64.orc', ORC, 'x UInt64') where indexHint(x > 10); +9223372036854775808 diff --git a/tests/queries/0_stateless/03164_orc_signedness.sql b/tests/queries/0_stateless/03164_orc_signedness.sql new file mode 100644 index 00000000000..ae2d0428ca5 --- /dev/null +++ b/tests/queries/0_stateless/03164_orc_signedness.sql @@ -0,0 +1,42 @@ +-- Tags: no-fasttest, no-parallel + +set input_format_orc_filter_push_down = 1; +set engine_file_truncate_on_insert = 1; + +insert into function file('i8.orc') select materialize(-128)::Int8 as x; +insert into function file('u8.orc') select materialize(128)::UInt8 as x; +insert into function file('i16.orc') select materialize(-32768)::Int16 as x; +insert into function file('u16.orc') select materialize(32768)::UInt16 as x; +insert into function file('i32.orc') select materialize(-2147483648)::Int32 as x; +insert into function file('u32.orc') select materialize(2147483648)::UInt32 as x; +insert into function file('i64.orc') select materialize(-9223372036854775808)::Int64 as x; +insert into function file('u64.orc') select materialize(9223372036854775808)::UInt64 as x; + +-- { echoOn } +select x from file('i8.orc') where indexHint(x = -128); +select x from file('i8.orc') where indexHint(x = 128); +select x from file('u8.orc') where indexHint(x = -128); +select x from file('u8.orc') where indexHint(x = 128); + +select x from file('i16.orc') where indexHint(x = -32768); +select x from file('i16.orc') where indexHint(x = 32768); +select x from file('u16.orc') where indexHint(x = -32768); +select x from file('u16.orc') where indexHint(x = 32768); + +select x from file('i32.orc') where indexHint(x = -2147483648); +select x from file('i32.orc') where indexHint(x = 2147483648); +select x from file('u32.orc') where indexHint(x = -2147483648); +select x from file('u32.orc') where indexHint(x = 2147483648); + +select x from file('i64.orc') where indexHint(x = -9223372036854775808); +select x from file('i64.orc') where indexHint(x = 9223372036854775808); +select x from file('u64.orc') where indexHint(x = -9223372036854775808); +select x from file('u64.orc') where indexHint(x = 9223372036854775808); + +select x from file('u8.orc', ORC, 'x UInt8') where indexHint(x > 10); +select x from file('u8.orc', ORC, 'x UInt64') where indexHint(x > 10); +select x from file('u16.orc', ORC, 'x UInt16') where indexHint(x > 10); +select x from file('u16.orc', ORC, 'x UInt64') where indexHint(x > 10); +select x from file('u32.orc', ORC, 'x UInt32') where indexHint(x > 10); +select x from file('u32.orc', ORC, 'x UInt64') where indexHint(x > 10); +select x from file('u64.orc', ORC, 'x UInt64') where indexHint(x > 10); diff --git a/tests/queries/0_stateless/03164_parallel_replicas_range_filter_min_max.reference b/tests/queries/0_stateless/03164_parallel_replicas_range_filter_min_max.reference new file mode 100644 index 00000000000..4dded9eda81 --- /dev/null +++ b/tests/queries/0_stateless/03164_parallel_replicas_range_filter_min_max.reference @@ -0,0 +1,10 @@ +10 +10 +10 +10 +10 +10 +10 +10 +13 +4 diff --git a/tests/queries/0_stateless/03164_parallel_replicas_range_filter_min_max.sql b/tests/queries/0_stateless/03164_parallel_replicas_range_filter_min_max.sql new file mode 100644 index 00000000000..58143395e44 --- /dev/null +++ b/tests/queries/0_stateless/03164_parallel_replicas_range_filter_min_max.sql @@ -0,0 +1,125 @@ +DROP TABLE IF EXISTS range_filter_custom_range_test; + +CREATE TABLE range_filter_custom_range_test (k UInt64) ENGINE=MergeTree ORDER BY k; + +INSERT INTO range_filter_custom_range_test SELECT number + 5 from numbers(10); + +SELECT count() +FROM +( + SELECT * + FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), range_filter_custom_range_test) + SETTINGS prefer_localhost_replica = 0, max_parallel_replicas = 3, distributed_group_by_no_merge = 0, + parallel_replicas_custom_key = 'k', parallel_replicas_custom_key_filter_type = 'range', + parallel_replicas_custom_key_range_lower = 5, parallel_replicas_custom_key_range_upper = 15 +); + +SELECT count() +FROM +( + SELECT * + FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), range_filter_custom_range_test) + SETTINGS prefer_localhost_replica = 0, max_parallel_replicas = 3, distributed_group_by_no_merge = 0, + parallel_replicas_custom_key = 'k', parallel_replicas_custom_key_filter_type = 'range', + parallel_replicas_custom_key_range_lower = 4, parallel_replicas_custom_key_range_upper = 14 +); + +SELECT count() +FROM +( + SELECT * + FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), range_filter_custom_range_test) + SETTINGS prefer_localhost_replica = 0, max_parallel_replicas = 3, distributed_group_by_no_merge = 0, + parallel_replicas_custom_key = 'k', parallel_replicas_custom_key_filter_type = 'range', + parallel_replicas_custom_key_range_lower = 6, parallel_replicas_custom_key_range_upper = 17 +); + + +SELECT count() +FROM +( + SELECT * + FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), range_filter_custom_range_test) + SETTINGS prefer_localhost_replica = 0, max_parallel_replicas = 3, distributed_group_by_no_merge = 0, + parallel_replicas_custom_key = 'k', parallel_replicas_custom_key_filter_type = 'range', + parallel_replicas_custom_key_range_lower = 0, parallel_replicas_custom_key_range_upper = 15 +); + +SELECT count() +FROM +( + SELECT * + FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), range_filter_custom_range_test) + SETTINGS prefer_localhost_replica = 0, max_parallel_replicas = 3, distributed_group_by_no_merge = 0, + parallel_replicas_custom_key = 'k', parallel_replicas_custom_key_filter_type = 'range', + parallel_replicas_custom_key_range_lower = 15, parallel_replicas_custom_key_range_upper = 25 +); + +SELECT count() +FROM +( + SELECT * + FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), range_filter_custom_range_test) + SETTINGS prefer_localhost_replica = 0, max_parallel_replicas = 3, distributed_group_by_no_merge = 0, + parallel_replicas_custom_key = 'k', parallel_replicas_custom_key_filter_type = 'range', + parallel_replicas_custom_key_range_lower = 0, parallel_replicas_custom_key_range_upper = 5 +); + +SELECT count() +FROM +( + SELECT * + FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), range_filter_custom_range_test) + SETTINGS prefer_localhost_replica = 0, max_parallel_replicas = 3, distributed_group_by_no_merge = 0, + parallel_replicas_custom_key = 'k', parallel_replicas_custom_key_filter_type = 'range', + parallel_replicas_custom_key_range_lower = 500, parallel_replicas_custom_key_range_upper = 10000 +); + + +SELECT count() +FROM +( + SELECT * + FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), range_filter_custom_range_test) + SETTINGS prefer_localhost_replica = 0, max_parallel_replicas = 3, distributed_group_by_no_merge = 0, + parallel_replicas_custom_key = 'k', parallel_replicas_custom_key_filter_type = 'range', + parallel_replicas_custom_key_range_lower = 10, parallel_replicas_custom_key_range_upper = 13 +); + +DROP TABLE range_filter_custom_range_test; + +DROP TABLE IF EXISTS range_filter_custom_range_test_2; + +CREATE TABLE range_filter_custom_range_test_2 (k UInt64) ENGINE=MergeTree ORDER BY k; + +INSERT INTO range_filter_custom_range_test_2 SELECT number from numbers(13); + +SELECT count() +FROM +( + SELECT * + FROM cluster(parallel_replicas, currentDatabase(), range_filter_custom_range_test_2) + SETTINGS prefer_localhost_replica = 0, max_parallel_replicas = 12, distributed_group_by_no_merge = 0, + parallel_replicas_custom_key = 'k', parallel_replicas_custom_key_filter_type = 'range', + parallel_replicas_custom_key_range_lower = 0, parallel_replicas_custom_key_range_upper = 13 +); + +DROP TABLE range_filter_custom_range_test_2; + +DROP TABLE IF EXISTS range_filter_custom_range_test_3; + +CREATE TABLE range_filter_custom_range_test_3 (k UInt64) ENGINE=MergeTree ORDER BY k; + +INSERT INTO range_filter_custom_range_test_3 SELECT number from numbers(4); + +SELECT count() +FROM +( + SELECT * + FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), range_filter_custom_range_test_3) + SETTINGS prefer_localhost_replica = 0, max_parallel_replicas = 12, distributed_group_by_no_merge = 0, + parallel_replicas_custom_key = 'k', parallel_replicas_custom_key_filter_type = 'range', + parallel_replicas_custom_key_range_lower = 0, parallel_replicas_custom_key_range_upper = 4 +); + +DROP TABLE range_filter_custom_range_test_3; \ No newline at end of file diff --git a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.reference b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.reference new file mode 100644 index 00000000000..1526555f6c8 --- /dev/null +++ b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.reference @@ -0,0 +1,8 @@ +selects_with_pk_usage +0 +selects_with_pk_usage +0 +selects_with_pk_usage +1 +selects_with_pk_usage +1 diff --git a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh new file mode 100755 index 00000000000..29d4c877909 --- /dev/null +++ b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash + +# Tests profile event "SelectedMarksByPrimaryKeyUsage" + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +table_id="$(random_str 10)" + +$CLICKHOUSE_CLIENT -q " + DROP TABLE IF EXISTS table_$table_id;" + +$CLICKHOUSE_CLIENT -q " + CREATE TABLE table_$table_id ( + pk Int64, + col1 Int64, + col2 Int64, + INDEX idx(col2) TYPE minmax + ) ENGINE = MergeTree ORDER BY pk PARTITION BY (pk % 2);"; + +$CLICKHOUSE_CLIENT -q " + ALTER TABLE table_$table_id ADD PROJECTION proj (SELECT * ORDER BY col1);" + +# Populate two partitions with 50k rows each. Each partition has >1 granules. +# We want SelectQueriesWithPrimaryKeyUsage to increase by +1 in each query, not by +1 per partition or by +1 per granule. +$CLICKHOUSE_CLIENT -q " + INSERT INTO table_$table_id SELECT number, number, number FROM numbers(100000);" + +# Run SELECTs + +# -- No filter +query_id="$(random_str 10)" +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT count(*) FROM table_$table_id FORMAT Null;" +$CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage + FROM + system.query_log + WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query_id = '$query_id' + FORMAT TSVWithNames; +" + +# -- Filter on non-PK column. However, it has a minmax-index defined. We expect the profile event to not increase. +query_id="$(random_str 10)" +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT count(*) FROM table_$table_id WHERE col2 >= 50000 FORMAT Null;" +$CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage + FROM + system.query_log + WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query_id = '$query_id' + FORMAT TSVWithNames; +" + +# Filter on PK +query_id="$(random_str 10)" +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT count(*) FROM table_$table_id WHERE pk >= 50000 FORMAT Null;" +$CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage + FROM + system.query_log + WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query_id = '$query_id' + FORMAT TSVWithNames; +" + +# Filter on PK in projection +query_id="$(random_str 10)" +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT count(*) FROM table_$table_id WHERE col1 >= 50000 FORMAT Null;" +$CLICKHOUSE_CLIENT -mn -q " + SYSTEM FLUSH LOGS; + SELECT + ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage + FROM + system.query_log + WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query_id = '$query_id' + FORMAT TSVWithNames; +" + +$CLICKHOUSE_CLIENT -q " + DROP TABLE table_$table_id;" diff --git a/tests/queries/0_stateless/03165_order_by_duplicate.reference b/tests/queries/0_stateless/03165_order_by_duplicate.reference new file mode 100644 index 00000000000..5d5e7a33f4a --- /dev/null +++ b/tests/queries/0_stateless/03165_order_by_duplicate.reference @@ -0,0 +1,39 @@ +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.test, final: 1 + WHERE + FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + QUERY id: 6, is_subquery: 1, is_distinct: 1 + PROJECTION COLUMNS + id UInt64 + PROJECTION + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: id, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test, final: 1 + ORDER BY + LIST id: 10, nodes: 1 + SORT id: 11, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 8, column_name: id, result_type: UInt64, source_id: 9 + LIMIT + CONSTANT id: 12, constant_value: UInt64_4, constant_value_type: UInt64 + ORDER BY + LIST id: 13, nodes: 1 + SORT id: 14, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + LIMIT BY LIMIT + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt64 + LIMIT BY + LIST id: 16, nodes: 1 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/03165_order_by_duplicate.sql b/tests/queries/0_stateless/03165_order_by_duplicate.sql new file mode 100644 index 00000000000..0054cbc36a6 --- /dev/null +++ b/tests/queries/0_stateless/03165_order_by_duplicate.sql @@ -0,0 +1,16 @@ +CREATE TABLE test +ENGINE = ReplacingMergeTree +PRIMARY KEY id +AS SELECT number AS id FROM numbers(100); + +EXPLAIN QUERY TREE SELECT id +FROM test FINAL +WHERE id IN ( + SELECT DISTINCT id + FROM test FINAL + ORDER BY id ASC + LIMIT 4 +) +ORDER BY id ASC +LIMIT 1 BY id +SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.reference b/tests/queries/0_stateless/03165_round_scale_as_column.reference new file mode 100644 index 00000000000..9ad25ed466a --- /dev/null +++ b/tests/queries/0_stateless/03165_round_scale_as_column.reference @@ -0,0 +1,2165 @@ +0 0 0 0 0 0 +1 1 1 1 1 1 +2 2 2 2 2 2 +3 3 3 3 3 3 +4 4 4 4 4 4 +5 5 5 5 5 5 +6 6 6 6 6 6 +7 7 7 7 7 7 +8 8 8 8 8 8 +9 9 9 9 9 9 +10 10 10 10 10 10 +11 11 11 11 11 11 +12 12 12 12 12 12 +13 13 13 13 13 13 +14 14 14 14 14 14 +15 15 15 15 15 15 +16 16 16 16 16 16 +17 17 17 17 17 17 +18 18 18 18 18 18 +19 19 19 19 19 19 +0 0 0 0 0 0 +1 1 1 1 1 1 +2 2 2 2 2 2 +3 3 3 3 3 3 +4 4 4 4 4 4 +5 5 5 5 5 5 +6 6 6 6 6 6 +7 7 7 7 7 7 +8 8 8 8 8 8 +9 9 9 9 9 9 +10 10 10 10 10 10 +11 11 11 11 11 11 +12 12 12 12 12 12 +13 13 13 13 13 13 +14 14 14 14 14 14 +15 15 15 15 15 15 +16 16 16 16 16 16 +17 17 17 17 17 17 +18 18 18 18 18 18 +19 19 19 19 19 19 +0 0 0 0 0 0 +1 1 1 1 1 1 +2 2 2 2 2 2 +3 3 3 3 3 3 +4 4 4 4 4 4 +5 5 5 5 5 5 +6 6 6 6 6 6 +7 7 7 7 7 7 +8 8 8 8 8 8 +9 9 9 9 9 9 +10 10 10 10 10 10 +11 11 11 11 11 11 +12 12 12 12 12 12 +13 13 13 13 13 13 +14 14 14 14 14 14 +15 15 15 15 15 15 +16 16 16 16 16 16 +17 17 17 17 17 17 +18 18 18 18 18 18 +19 19 19 19 19 19 +0 0 0 0 0 0 +1 1 1 1 1 1 +2 2 2 2 2 2 +3 3 3 3 3 3 +4 4 4 4 4 4 +5 5 5 5 5 5 +6 6 6 6 6 6 +7 7 7 7 7 7 +8 8 8 8 8 8 +9 9 9 9 9 9 +10 10 10 10 10 10 +11 11 11 11 11 11 +12 12 12 12 12 12 +13 13 13 13 13 13 +14 14 14 14 14 14 +15 15 15 15 15 15 +16 16 16 16 16 16 +17 17 17 17 17 17 +18 18 18 18 18 18 +19 19 19 19 19 19 +-10 -10 -10 -10 -10 -10 +-9 -9 -9 -9 -9 -9 +-8 -8 -8 -8 -8 -8 +-7 -7 -7 -7 -7 -7 +-6 -6 -6 -6 -6 -6 +-5 -5 -5 -5 -5 -5 +-4 -4 -4 -4 -4 -4 +-3 -3 -3 -3 -3 -3 +-2 -2 -2 -2 -2 -2 +-1 -1 -1 -1 -1 -1 +0 0 0 0 0 0 +1 1 1 1 1 1 +2 2 2 2 2 2 +3 3 3 3 3 3 +4 4 4 4 4 4 +5 5 5 5 5 5 +6 6 6 6 6 6 +7 7 7 7 7 7 +8 8 8 8 8 8 +9 9 9 9 9 9 +-10 -10 -10 -10 -10 -10 +-9 -9 -9 -9 -9 -9 +-8 -8 -8 -8 -8 -8 +-7 -7 -7 -7 -7 -7 +-6 -6 -6 -6 -6 -6 +-5 -5 -5 -5 -5 -5 +-4 -4 -4 -4 -4 -4 +-3 -3 -3 -3 -3 -3 +-2 -2 -2 -2 -2 -2 +-1 -1 -1 -1 -1 -1 +0 0 0 0 0 0 +1 1 1 1 1 1 +2 2 2 2 2 2 +3 3 3 3 3 3 +4 4 4 4 4 4 +5 5 5 5 5 5 +6 6 6 6 6 6 +7 7 7 7 7 7 +8 8 8 8 8 8 +9 9 9 9 9 9 +-10 -10 -10 -10 -10 -10 +-9 -9 -9 -9 -9 -9 +-8 -8 -8 -8 -8 -8 +-7 -7 -7 -7 -7 -7 +-6 -6 -6 -6 -6 -6 +-5 -5 -5 -5 -5 -5 +-4 -4 -4 -4 -4 -4 +-3 -3 -3 -3 -3 -3 +-2 -2 -2 -2 -2 -2 +-1 -1 -1 -1 -1 -1 +0 0 0 0 0 0 +1 1 1 1 1 1 +2 2 2 2 2 2 +3 3 3 3 3 3 +4 4 4 4 4 4 +5 5 5 5 5 5 +6 6 6 6 6 6 +7 7 7 7 7 7 +8 8 8 8 8 8 +9 9 9 9 9 9 +-10 -10 -10 -10 -10 -10 +-9 -9 -9 -9 -9 -9 +-8 -8 -8 -8 -8 -8 +-7 -7 -7 -7 -7 -7 +-6 -6 -6 -6 -6 -6 +-5 -5 -5 -5 -5 -5 +-4 -4 -4 -4 -4 -4 +-3 -3 -3 -3 -3 -3 +-2 -2 -2 -2 -2 -2 +-1 -1 -1 -1 -1 -1 +0 0 0 0 0 0 +1 1 1 1 1 1 +2 2 2 2 2 2 +3 3 3 3 3 3 +4 4 4 4 4 4 +5 5 5 5 5 5 +6 6 6 6 6 6 +7 7 7 7 7 7 +8 8 8 8 8 8 +9 9 9 9 9 9 +-10 -10 -10 -10 -10 -10 +-9 -9 -9 -9 -9 -9 +-8 -8 -8 -8 -8 -8 +-7 -7 -7 -7 -7 -7 +-6 -6 -6 -6 -6 -6 +-5 -5 -5 -5 -5 -5 +-4 -4 -4 -4 -4 -4 +-3 -3 -3 -3 -3 -3 +-2 -2 -2 -2 -2 -2 +-1 -1 -1 -1 -1 -1 +0 0 0 0 0 0 +1 1 1 1 1 1 +2 2 2 2 2 2 +3 3 3 3 3 3 +4 4 4 4 4 4 +5 5 5 5 5 5 +6 6 6 6 6 6 +7 7 7 7 7 7 +8 8 8 8 8 8 +9 9 9 9 9 9 +-10 -10 -10 -10 -10 -10 +-9 -9 -9 -9 -9 -9 +-8 -8 -8 -8 -8 -8 +-7 -7 -7 -7 -7 -7 +-6 -6 -6 -6 -6 -6 +-5 -5 -5 -5 -5 -5 +-4 -4 -4 -4 -4 -4 +-3 -3 -3 -3 -3 -3 +-2 -2 -2 -2 -2 -2 +-1 -1 -1 -1 -1 -1 +0 0 0 0 0 0 +1 1 1 1 1 1 +2 2 2 2 2 2 +3 3 3 3 3 3 +4 4 4 4 4 4 +5 5 5 5 5 5 +6 6 6 6 6 6 +7 7 7 7 7 7 +8 8 8 8 8 8 +9 9 9 9 9 9 +-1 -1 -1 -1 -1 -1 +-0.9 -1 -1 -1 -0 -0 +-0.8 -1 -1 -1 -0 -0 +-0.7 -1 -1 -1 -0 -0 +-0.6 -1 -1 -1 -0 -0 +-0.5 -0 -0 -1 -0 -0 +-0.4 -0 -0 -1 -0 -0 +-0.3 -0 -0 -1 -0 -0 +-0.2 -0 -0 -1 -0 -0 +-0.1 -0 -0 -1 -0 -0 +0 0 0 0 0 0 +0.1 0 0 0 1 0 +0.2 0 0 0 1 0 +0.3 0 0 0 1 0 +0.4 0 0 0 1 0 +0.5 0 0 0 1 0 +0.6 1 1 0 1 0 +0.7 1 1 0 1 0 +0.8 1 1 0 1 0 +0.9 1 1 0 1 0 +-1 -1 -1 -1 -1 -1 +-0.9 -1 -1 -1 -0 -0 +-0.8 -1 -1 -1 -0 -0 +-0.7 -1 -1 -1 -0 -0 +-0.6 -1 -1 -1 -0 -0 +-0.5 -0 -0 -1 -0 -0 +-0.4 -0 -0 -1 -0 -0 +-0.3 -0 -0 -1 -0 -0 +-0.2 -0 -0 -1 -0 -0 +-0.1 -0 -0 -1 -0 -0 +0 0 0 0 0 0 +0.1 0 0 0 1 0 +0.2 0 0 0 1 0 +0.3 0 0 0 1 0 +0.4 0 0 0 1 0 +0.5 0 0 0 1 0 +0.6 1 1 0 1 0 +0.7 1 1 0 1 0 +0.8 1 1 0 1 0 +0.9 1 1 0 1 0 +-1 -1 -1 -1 -1 -1 +-0.9 -0.9 -0.9 -0.9 -0.9 -0.9 +-0.8 -0.8 -0.8 -0.8 -0.8 -0.8 +-0.7 -0.7 -0.7 -0.7 -0.7 -0.7 +-0.6 -0.6 -0.6 -0.6 -0.6 -0.6 +-0.5 -0.5 -0.5 -0.5 -0.5 -0.5 +-0.4 -0.4 -0.4 -0.4 -0.4 -0.4 +-0.3 -0.3 -0.3 -0.3 -0.3 -0.3 +-0.2 -0.2 -0.2 -0.2 -0.2 -0.2 +-0.1 -0.1 -0.1 -0.1 -0.1 -0.1 +0 0 0 0 0 0 +0.1 0.1 0.1 0.1 0.1 0.1 +0.2 0.2 0.2 0.2 0.2 0.2 +0.3 0.3 0.3 0.3 0.3 0.3 +0.4 0.4 0.4 0.4 0.4 0.4 +0.5 0.5 0.5 0.5 0.5 0.5 +0.6 0.6 0.6 0.6 0.6 0.6 +0.7 0.7 0.7 0.7 0.7 0.7 +0.8 0.8 0.8 0.8 0.8 0.8 +0.9 0.9 0.9 0.9 0.9 0.9 +-1 -1 -1 -1 -1 -1 +-0.9 -0.9 -0.9 -0.9 -0.9 -0.9 +-0.8 -0.8 -0.8 -0.8 -0.8 -0.8 +-0.7 -0.7 -0.7 -0.7 -0.7 -0.7 +-0.6 -0.6 -0.6 -0.6 -0.6 -0.6 +-0.5 -0.5 -0.5 -0.5 -0.5 -0.5 +-0.4 -0.4 -0.4 -0.4 -0.4 -0.4 +-0.3 -0.3 -0.3 -0.3 -0.3 -0.3 +-0.2 -0.2 -0.2 -0.2 -0.2 -0.2 +-0.1 -0.1 -0.1 -0.1 -0.1 -0.1 +0 0 0 0 0 0 +0.1 0.1 0.1 0.1 0.1 0.1 +0.2 0.2 0.2 0.2 0.2 0.2 +0.3 0.3 0.3 0.3 0.3 0.3 +0.4 0.4 0.4 0.4 0.4 0.4 +0.5 0.5 0.5 0.5 0.5 0.5 +0.6 0.6 0.6 0.6 0.6 0.6 +0.7 0.7 0.7 0.7 0.7 0.7 +0.8 0.8 0.8 0.8 0.8 0.8 +0.9 0.9 0.9 0.9 0.9 0.9 +0 0 0 0 0 0 +1 0 0 0 10 0 +2 0 0 0 10 0 +3 0 0 0 10 0 +4 0 0 0 10 0 +5 10 0 0 10 0 +6 10 10 0 10 0 +7 10 10 0 10 0 +8 10 10 0 10 0 +9 10 10 0 10 0 +10 10 10 10 10 10 +11 10 10 10 20 10 +12 10 10 10 20 10 +13 10 10 10 20 10 +14 10 10 10 20 10 +15 20 20 10 20 10 +16 20 20 10 20 10 +17 20 20 10 20 10 +18 20 20 10 20 10 +19 20 20 10 20 10 +0 0 0 0 0 0 +1 0 0 0 10 0 +2 0 0 0 10 0 +3 0 0 0 10 0 +4 0 0 0 10 0 +5 10 0 0 10 0 +6 10 10 0 10 0 +7 10 10 0 10 0 +8 10 10 0 10 0 +9 10 10 0 10 0 +10 10 10 10 10 10 +11 10 10 10 20 10 +12 10 10 10 20 10 +13 10 10 10 20 10 +14 10 10 10 20 10 +15 20 20 10 20 10 +16 20 20 10 20 10 +17 20 20 10 20 10 +18 20 20 10 20 10 +19 20 20 10 20 10 +0 0 0 0 0 0 +1 0 0 0 10 0 +2 0 0 0 10 0 +3 0 0 0 10 0 +4 0 0 0 10 0 +5 10 0 0 10 0 +6 10 10 0 10 0 +7 10 10 0 10 0 +8 10 10 0 10 0 +9 10 10 0 10 0 +10 10 10 10 10 10 +11 10 10 10 20 10 +12 10 10 10 20 10 +13 10 10 10 20 10 +14 10 10 10 20 10 +15 20 20 10 20 10 +16 20 20 10 20 10 +17 20 20 10 20 10 +18 20 20 10 20 10 +19 20 20 10 20 10 +0 0 0 0 0 0 +1 0 0 0 10 0 +2 0 0 0 10 0 +3 0 0 0 10 0 +4 0 0 0 10 0 +5 10 0 0 10 0 +6 10 10 0 10 0 +7 10 10 0 10 0 +8 10 10 0 10 0 +9 10 10 0 10 0 +10 10 10 10 10 10 +11 10 10 10 20 10 +12 10 10 10 20 10 +13 10 10 10 20 10 +14 10 10 10 20 10 +15 20 20 10 20 10 +16 20 20 10 20 10 +17 20 20 10 20 10 +18 20 20 10 20 10 +19 20 20 10 20 10 +-10 -10 -10 -10 -10 -10 +-9 -10 -10 -10 0 0 +-8 -10 -10 -10 0 0 +-7 -10 -10 -10 0 0 +-6 -10 -10 -10 0 0 +-5 -10 0 -10 0 0 +-4 0 0 -10 0 0 +-3 0 0 -10 0 0 +-2 0 0 -10 0 0 +-1 0 0 -10 0 0 +0 0 0 0 0 0 +1 0 0 0 10 0 +2 0 0 0 10 0 +3 0 0 0 10 0 +4 0 0 0 10 0 +5 10 0 0 10 0 +6 10 10 0 10 0 +7 10 10 0 10 0 +8 10 10 0 10 0 +9 10 10 0 10 0 +-10 -10 -10 -10 -10 -10 +-9 -10 -10 -10 0 0 +-8 -10 -10 -10 0 0 +-7 -10 -10 -10 0 0 +-6 -10 -10 -10 0 0 +-5 -10 0 -10 0 0 +-4 0 0 -10 0 0 +-3 0 0 -10 0 0 +-2 0 0 -10 0 0 +-1 0 0 -10 0 0 +0 0 0 0 0 0 +1 0 0 0 10 0 +2 0 0 0 10 0 +3 0 0 0 10 0 +4 0 0 0 10 0 +5 10 0 0 10 0 +6 10 10 0 10 0 +7 10 10 0 10 0 +8 10 10 0 10 0 +9 10 10 0 10 0 +-10 -10 -10 -10 -10 -10 +-9 -10 -10 -10 0 0 +-8 -10 -10 -10 0 0 +-7 -10 -10 -10 0 0 +-6 -10 -10 -10 0 0 +-5 -10 0 -10 0 0 +-4 0 0 -10 0 0 +-3 0 0 -10 0 0 +-2 0 0 -10 0 0 +-1 0 0 -10 0 0 +0 0 0 0 0 0 +1 0 0 0 10 0 +2 0 0 0 10 0 +3 0 0 0 10 0 +4 0 0 0 10 0 +5 10 0 0 10 0 +6 10 10 0 10 0 +7 10 10 0 10 0 +8 10 10 0 10 0 +9 10 10 0 10 0 +-10 -10 -10 -10 -10 -10 +-9 -10 -10 -10 0 0 +-8 -10 -10 -10 0 0 +-7 -10 -10 -10 0 0 +-6 -10 -10 -10 0 0 +-5 -10 0 -10 0 0 +-4 0 0 -10 0 0 +-3 0 0 -10 0 0 +-2 0 0 -10 0 0 +-1 0 0 -10 0 0 +0 0 0 0 0 0 +1 0 0 0 10 0 +2 0 0 0 10 0 +3 0 0 0 10 0 +4 0 0 0 10 0 +5 10 0 0 10 0 +6 10 10 0 10 0 +7 10 10 0 10 0 +8 10 10 0 10 0 +9 10 10 0 10 0 +-10 -10 -10 -10 -10 -10 +-9 -10 -10 -10 -0 -0 +-8 -10 -10 -10 -0 -0 +-7 -10 -10 -10 -0 -0 +-6 -10 -10 -10 -0 -0 +-5 -0 -0 -10 -0 -0 +-4 -0 -0 -10 -0 -0 +-3 -0 -0 -10 -0 -0 +-2 -0 -0 -10 -0 -0 +-1 -0 -0 -10 -0 -0 +0 0 0 0 0 0 +1 0 0 0 10 0 +2 0 0 0 10 0 +3 0 0 0 10 0 +4 0 0 0 10 0 +5 0 0 0 10 0 +6 10 10 0 10 0 +7 10 10 0 10 0 +8 10 10 0 10 0 +9 10 10 0 10 0 +-10 -10 -10 -10 -10 -10 +-9 -10 -10 -10 -0 -0 +-8 -10 -10 -10 -0 -0 +-7 -10 -10 -10 -0 -0 +-6 -10 -10 -10 -0 -0 +-5 -0 -0 -10 -0 -0 +-4 -0 -0 -10 -0 -0 +-3 -0 -0 -10 -0 -0 +-2 -0 -0 -10 -0 -0 +-1 -0 -0 -10 -0 -0 +0 0 0 0 0 0 +1 0 0 0 10 0 +2 0 0 0 10 0 +3 0 0 0 10 0 +4 0 0 0 10 0 +5 0 0 0 10 0 +6 10 10 0 10 0 +7 10 10 0 10 0 +8 10 10 0 10 0 +9 10 10 0 10 0 +0 0 0 0 0 0 +1 0 0 0 100 0 +2 0 0 0 100 0 +3 0 0 0 100 0 +4 0 0 0 100 0 +5 0 0 0 100 0 +6 0 0 0 100 0 +7 0 0 0 100 0 +8 0 0 0 100 0 +9 0 0 0 100 0 +10 0 0 0 100 0 +11 0 0 0 100 0 +12 0 0 0 100 0 +13 0 0 0 100 0 +14 0 0 0 100 0 +15 0 0 0 100 0 +16 0 0 0 100 0 +17 0 0 0 100 0 +18 0 0 0 100 0 +19 0 0 0 100 0 +0 0 0 0 0 0 +1 0 0 0 100 0 +2 0 0 0 100 0 +3 0 0 0 100 0 +4 0 0 0 100 0 +5 0 0 0 100 0 +6 0 0 0 100 0 +7 0 0 0 100 0 +8 0 0 0 100 0 +9 0 0 0 100 0 +10 0 0 0 100 0 +11 0 0 0 100 0 +12 0 0 0 100 0 +13 0 0 0 100 0 +14 0 0 0 100 0 +15 0 0 0 100 0 +16 0 0 0 100 0 +17 0 0 0 100 0 +18 0 0 0 100 0 +19 0 0 0 100 0 +0 0 0 0 0 0 +1 0 0 0 100 0 +2 0 0 0 100 0 +3 0 0 0 100 0 +4 0 0 0 100 0 +5 0 0 0 100 0 +6 0 0 0 100 0 +7 0 0 0 100 0 +8 0 0 0 100 0 +9 0 0 0 100 0 +10 0 0 0 100 0 +11 0 0 0 100 0 +12 0 0 0 100 0 +13 0 0 0 100 0 +14 0 0 0 100 0 +15 0 0 0 100 0 +16 0 0 0 100 0 +17 0 0 0 100 0 +18 0 0 0 100 0 +19 0 0 0 100 0 +0 0 0 0 0 0 +1 0 0 0 100 0 +2 0 0 0 100 0 +3 0 0 0 100 0 +4 0 0 0 100 0 +5 0 0 0 100 0 +6 0 0 0 100 0 +7 0 0 0 100 0 +8 0 0 0 100 0 +9 0 0 0 100 0 +10 0 0 0 100 0 +11 0 0 0 100 0 +12 0 0 0 100 0 +13 0 0 0 100 0 +14 0 0 0 100 0 +15 0 0 0 100 0 +16 0 0 0 100 0 +17 0 0 0 100 0 +18 0 0 0 100 0 +19 0 0 0 100 0 +-10 0 0 -100 0 0 +-9 0 0 -100 0 0 +-8 0 0 -100 0 0 +-7 0 0 -100 0 0 +-6 0 0 -100 0 0 +-5 0 0 -100 0 0 +-4 0 0 -100 0 0 +-3 0 0 -100 0 0 +-2 0 0 -100 0 0 +-1 0 0 -100 0 0 +0 0 0 0 0 0 +1 0 0 0 100 0 +2 0 0 0 100 0 +3 0 0 0 100 0 +4 0 0 0 100 0 +5 0 0 0 100 0 +6 0 0 0 100 0 +7 0 0 0 100 0 +8 0 0 0 100 0 +9 0 0 0 100 0 +-10 0 0 -100 0 0 +-9 0 0 -100 0 0 +-8 0 0 -100 0 0 +-7 0 0 -100 0 0 +-6 0 0 -100 0 0 +-5 0 0 -100 0 0 +-4 0 0 -100 0 0 +-3 0 0 -100 0 0 +-2 0 0 -100 0 0 +-1 0 0 -100 0 0 +0 0 0 0 0 0 +1 0 0 0 100 0 +2 0 0 0 100 0 +3 0 0 0 100 0 +4 0 0 0 100 0 +5 0 0 0 100 0 +6 0 0 0 100 0 +7 0 0 0 100 0 +8 0 0 0 100 0 +9 0 0 0 100 0 +-10 0 0 -100 0 0 +-9 0 0 -100 0 0 +-8 0 0 -100 0 0 +-7 0 0 -100 0 0 +-6 0 0 -100 0 0 +-5 0 0 -100 0 0 +-4 0 0 -100 0 0 +-3 0 0 -100 0 0 +-2 0 0 -100 0 0 +-1 0 0 -100 0 0 +0 0 0 0 0 0 +1 0 0 0 100 0 +2 0 0 0 100 0 +3 0 0 0 100 0 +4 0 0 0 100 0 +5 0 0 0 100 0 +6 0 0 0 100 0 +7 0 0 0 100 0 +8 0 0 0 100 0 +9 0 0 0 100 0 +-10 0 0 -100 0 0 +-9 0 0 -100 0 0 +-8 0 0 -100 0 0 +-7 0 0 -100 0 0 +-6 0 0 -100 0 0 +-5 0 0 -100 0 0 +-4 0 0 -100 0 0 +-3 0 0 -100 0 0 +-2 0 0 -100 0 0 +-1 0 0 -100 0 0 +0 0 0 0 0 0 +1 0 0 0 100 0 +2 0 0 0 100 0 +3 0 0 0 100 0 +4 0 0 0 100 0 +5 0 0 0 100 0 +6 0 0 0 100 0 +7 0 0 0 100 0 +8 0 0 0 100 0 +9 0 0 0 100 0 +-10 -0 -0 -100 -0 -0 +-9 -0 -0 -100 -0 -0 +-8 -0 -0 -100 -0 -0 +-7 -0 -0 -100 -0 -0 +-6 -0 -0 -100 -0 -0 +-5 -0 -0 -100 -0 -0 +-4 -0 -0 -100 -0 -0 +-3 -0 -0 -100 -0 -0 +-2 -0 -0 -100 -0 -0 +-1 -0 -0 -100 -0 -0 +0 0 0 0 0 0 +1 0 0 0 100 0 +2 0 0 0 100 0 +3 0 0 0 100 0 +4 0 0 0 100 0 +5 0 0 0 100 0 +6 0 0 0 100 0 +7 0 0 0 100 0 +8 0 0 0 100 0 +9 0 0 0 100 0 +-10 -0 -0 -100 -0 -0 +-9 -0 -0 -100 -0 -0 +-8 -0 -0 -100 -0 -0 +-7 -0 -0 -100 -0 -0 +-6 -0 -0 -100 -0 -0 +-5 -0 -0 -100 -0 -0 +-4 -0 -0 -100 -0 -0 +-3 -0 -0 -100 -0 -0 +-2 -0 -0 -100 -0 -0 +-1 -0 -0 -100 -0 -0 +0 0 0 0 0 0 +1 0 0 0 100 0 +2 0 0 0 100 0 +3 0 0 0 100 0 +4 0 0 0 100 0 +5 0 0 0 100 0 +6 0 0 0 100 0 +7 0 0 0 100 0 +8 0 0 0 100 0 +9 0 0 0 100 0 +CHECKPOINT1 +id u8 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale) +0 0 0 0 0 0 0 0 +1 1 0 1 1 1 1 1 +2 2 0 2 2 2 2 2 +3 3 0 3 3 3 3 3 +4 4 0 4 4 4 4 4 +5 5 0 5 5 5 5 5 +6 6 0 6 6 6 6 6 +7 7 0 7 7 7 7 7 +8 8 0 8 8 8 8 8 +9 9 0 9 9 9 9 9 +10 10 0 10 10 10 10 10 +11 11 0 11 11 11 11 11 +12 12 0 12 12 12 12 12 +13 13 0 13 13 13 13 13 +14 14 0 14 14 14 14 14 +15 15 0 15 15 15 15 15 +16 16 0 16 16 16 16 16 +17 17 0 17 17 17 17 17 +18 18 0 18 18 18 18 18 +19 19 0 19 19 19 19 19 +20 10 0 10 10 10 10 10 +21 11 0 11 11 11 11 11 +22 12 0 12 12 12 12 12 +23 13 0 13 13 13 13 13 +24 14 0 14 14 14 14 14 +25 15 0 15 15 15 15 15 +26 16 0 16 16 16 16 16 +27 17 0 17 17 17 17 17 +28 18 0 18 18 18 18 18 +29 19 0 19 19 19 19 19 +30 20 0 20 20 20 20 20 +31 21 0 21 21 21 21 21 +32 22 0 22 22 22 22 22 +33 23 0 23 23 23 23 23 +34 24 0 24 24 24 24 24 +35 25 0 25 25 25 25 25 +36 26 0 26 26 26 26 26 +37 27 0 27 27 27 27 27 +38 28 0 28 28 28 28 28 +39 29 0 29 29 29 29 29 +40 0 -1 0 0 0 0 0 +41 1 -1 0 0 0 10 0 +42 2 -1 0 0 0 10 0 +43 3 -1 0 0 0 10 0 +44 4 -1 0 0 0 10 0 +45 5 -1 10 0 0 10 0 +46 6 -1 10 10 0 10 0 +47 7 -1 10 10 0 10 0 +48 8 -1 10 10 0 10 0 +49 9 -1 10 10 0 10 0 +50 10 -1 10 10 10 10 10 +51 11 -1 10 10 10 20 10 +52 12 -1 10 10 10 20 10 +53 13 -1 10 10 10 20 10 +54 14 -1 10 10 10 20 10 +55 15 -1 20 20 10 20 10 +56 16 -1 20 20 10 20 10 +57 17 -1 20 20 10 20 10 +58 18 -1 20 20 10 20 10 +59 19 -1 20 20 10 20 10 +60 10 -1 10 10 10 10 10 +61 11 -1 10 10 10 20 10 +62 12 -1 10 10 10 20 10 +63 13 -1 10 10 10 20 10 +64 14 -1 10 10 10 20 10 +65 15 -1 20 20 10 20 10 +66 16 -1 20 20 10 20 10 +67 17 -1 20 20 10 20 10 +68 18 -1 20 20 10 20 10 +69 19 -1 20 20 10 20 10 +70 20 -1 20 20 20 20 20 +71 21 -1 20 20 20 30 20 +72 22 -1 20 20 20 30 20 +73 23 -1 20 20 20 30 20 +74 24 -1 20 20 20 30 20 +75 25 -1 30 20 20 30 20 +76 26 -1 30 30 20 30 20 +77 27 -1 30 30 20 30 20 +78 28 -1 30 30 20 30 20 +79 29 -1 30 30 20 30 20 +80 0 -2 0 0 0 0 0 +81 1 -2 0 0 0 100 0 +82 2 -2 0 0 0 100 0 +83 3 -2 0 0 0 100 0 +84 4 -2 0 0 0 100 0 +85 5 -2 0 0 0 100 0 +86 6 -2 0 0 0 100 0 +87 7 -2 0 0 0 100 0 +88 8 -2 0 0 0 100 0 +89 9 -2 0 0 0 100 0 +90 10 -2 0 0 0 100 0 +91 11 -2 0 0 0 100 0 +92 12 -2 0 0 0 100 0 +93 13 -2 0 0 0 100 0 +94 14 -2 0 0 0 100 0 +95 15 -2 0 0 0 100 0 +96 16 -2 0 0 0 100 0 +97 17 -2 0 0 0 100 0 +98 18 -2 0 0 0 100 0 +99 19 -2 0 0 0 100 0 +100 10 -2 0 0 0 100 0 +101 11 -2 0 0 0 100 0 +102 12 -2 0 0 0 100 0 +103 13 -2 0 0 0 100 0 +104 14 -2 0 0 0 100 0 +105 15 -2 0 0 0 100 0 +106 16 -2 0 0 0 100 0 +107 17 -2 0 0 0 100 0 +108 18 -2 0 0 0 100 0 +109 19 -2 0 0 0 100 0 +110 20 -2 0 0 0 100 0 +111 21 -2 0 0 0 100 0 +112 22 -2 0 0 0 100 0 +113 23 -2 0 0 0 100 0 +114 24 -2 0 0 0 100 0 +115 25 -2 0 0 0 100 0 +116 26 -2 0 0 0 100 0 +117 27 -2 0 0 0 100 0 +118 28 -2 0 0 0 100 0 +119 29 -2 0 0 0 100 0 +200 0 0 0 0 0 0 0 +201 0 -1 0 0 0 0 0 +202 0 -2 0 0 0 0 0 +203 0 -3 0 0 0 0 0 +204 0 -4 0 0 0 0 0 +205 0 -5 0 0 0 0 0 +206 0 -6 0 0 0 0 0 +207 0 -7 0 0 0 0 0 +208 0 -8 0 0 0 0 0 +209 0 -9 0 0 0 0 0 +210 0 0 0 0 0 0 0 +211 0 -1 0 0 0 0 0 +212 0 -2 0 0 0 0 0 +213 0 -3 0 0 0 0 0 +214 0 -4 0 0 0 0 0 +215 0 -5 0 0 0 0 0 +216 0 -6 0 0 0 0 0 +217 0 -7 0 0 0 0 0 +218 0 -8 0 0 0 0 0 +219 0 -9 0 0 0 0 0 +300 2 4 2 2 2 2 2 +301 20 4 20 20 20 20 20 +302 200 4 200 200 200 200 200 +303 5 4 5 5 5 5 5 +304 50 4 50 50 50 50 50 +305 244 4 244 244 244 244 244 +id u16 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale) +0 0 0 0 0 0 0 0 +1 1 0 1 1 1 1 1 +2 2 0 2 2 2 2 2 +3 3 0 3 3 3 3 3 +4 4 0 4 4 4 4 4 +5 5 0 5 5 5 5 5 +6 6 0 6 6 6 6 6 +7 7 0 7 7 7 7 7 +8 8 0 8 8 8 8 8 +9 9 0 9 9 9 9 9 +10 10 0 10 10 10 10 10 +11 11 0 11 11 11 11 11 +12 12 0 12 12 12 12 12 +13 13 0 13 13 13 13 13 +14 14 0 14 14 14 14 14 +15 15 0 15 15 15 15 15 +16 16 0 16 16 16 16 16 +17 17 0 17 17 17 17 17 +18 18 0 18 18 18 18 18 +19 19 0 19 19 19 19 19 +20 10 0 10 10 10 10 10 +21 11 0 11 11 11 11 11 +22 12 0 12 12 12 12 12 +23 13 0 13 13 13 13 13 +24 14 0 14 14 14 14 14 +25 15 0 15 15 15 15 15 +26 16 0 16 16 16 16 16 +27 17 0 17 17 17 17 17 +28 18 0 18 18 18 18 18 +29 19 0 19 19 19 19 19 +30 20 0 20 20 20 20 20 +31 21 0 21 21 21 21 21 +32 22 0 22 22 22 22 22 +33 23 0 23 23 23 23 23 +34 24 0 24 24 24 24 24 +35 25 0 25 25 25 25 25 +36 26 0 26 26 26 26 26 +37 27 0 27 27 27 27 27 +38 28 0 28 28 28 28 28 +39 29 0 29 29 29 29 29 +40 0 -1 0 0 0 0 0 +41 1 -1 0 0 0 10 0 +42 2 -1 0 0 0 10 0 +43 3 -1 0 0 0 10 0 +44 4 -1 0 0 0 10 0 +45 5 -1 10 0 0 10 0 +46 6 -1 10 10 0 10 0 +47 7 -1 10 10 0 10 0 +48 8 -1 10 10 0 10 0 +49 9 -1 10 10 0 10 0 +50 10 -1 10 10 10 10 10 +51 11 -1 10 10 10 20 10 +52 12 -1 10 10 10 20 10 +53 13 -1 10 10 10 20 10 +54 14 -1 10 10 10 20 10 +55 15 -1 20 20 10 20 10 +56 16 -1 20 20 10 20 10 +57 17 -1 20 20 10 20 10 +58 18 -1 20 20 10 20 10 +59 19 -1 20 20 10 20 10 +60 10 -1 10 10 10 10 10 +61 11 -1 10 10 10 20 10 +62 12 -1 10 10 10 20 10 +63 13 -1 10 10 10 20 10 +64 14 -1 10 10 10 20 10 +65 15 -1 20 20 10 20 10 +66 16 -1 20 20 10 20 10 +67 17 -1 20 20 10 20 10 +68 18 -1 20 20 10 20 10 +69 19 -1 20 20 10 20 10 +70 20 -1 20 20 20 20 20 +71 21 -1 20 20 20 30 20 +72 22 -1 20 20 20 30 20 +73 23 -1 20 20 20 30 20 +74 24 -1 20 20 20 30 20 +75 25 -1 30 20 20 30 20 +76 26 -1 30 30 20 30 20 +77 27 -1 30 30 20 30 20 +78 28 -1 30 30 20 30 20 +79 29 -1 30 30 20 30 20 +80 0 -2 0 0 0 0 0 +81 1 -2 0 0 0 100 0 +82 2 -2 0 0 0 100 0 +83 3 -2 0 0 0 100 0 +84 4 -2 0 0 0 100 0 +85 5 -2 0 0 0 100 0 +86 6 -2 0 0 0 100 0 +87 7 -2 0 0 0 100 0 +88 8 -2 0 0 0 100 0 +89 9 -2 0 0 0 100 0 +90 10 -2 0 0 0 100 0 +91 11 -2 0 0 0 100 0 +92 12 -2 0 0 0 100 0 +93 13 -2 0 0 0 100 0 +94 14 -2 0 0 0 100 0 +95 15 -2 0 0 0 100 0 +96 16 -2 0 0 0 100 0 +97 17 -2 0 0 0 100 0 +98 18 -2 0 0 0 100 0 +99 19 -2 0 0 0 100 0 +100 10 -2 0 0 0 100 0 +101 11 -2 0 0 0 100 0 +102 12 -2 0 0 0 100 0 +103 13 -2 0 0 0 100 0 +104 14 -2 0 0 0 100 0 +105 15 -2 0 0 0 100 0 +106 16 -2 0 0 0 100 0 +107 17 -2 0 0 0 100 0 +108 18 -2 0 0 0 100 0 +109 19 -2 0 0 0 100 0 +110 20 -2 0 0 0 100 0 +111 21 -2 0 0 0 100 0 +112 22 -2 0 0 0 100 0 +113 23 -2 0 0 0 100 0 +114 24 -2 0 0 0 100 0 +115 25 -2 0 0 0 100 0 +116 26 -2 0 0 0 100 0 +117 27 -2 0 0 0 100 0 +118 28 -2 0 0 0 100 0 +119 29 -2 0 0 0 100 0 +200 0 0 0 0 0 0 0 +201 0 -1 0 0 0 0 0 +202 0 -2 0 0 0 0 0 +203 0 -3 0 0 0 0 0 +204 0 -4 0 0 0 0 0 +205 0 -5 0 0 0 0 0 +206 0 -6 0 0 0 0 0 +207 0 -7 0 0 0 0 0 +208 0 -8 0 0 0 0 0 +209 0 -9 0 0 0 0 0 +210 0 0 0 0 0 0 0 +211 0 -1 0 0 0 0 0 +212 0 -2 0 0 0 0 0 +213 0 -3 0 0 0 0 0 +214 0 -4 0 0 0 0 0 +215 0 -5 0 0 0 0 0 +216 0 -6 0 0 0 0 0 +217 0 -7 0 0 0 0 0 +218 0 -8 0 0 0 0 0 +219 0 -9 0 0 0 0 0 +300 2 4 2 2 2 2 2 +301 20 4 20 20 20 20 20 +302 200 4 200 200 200 200 200 +303 5 4 5 5 5 5 5 +304 50 4 50 50 50 50 50 +305 500 4 500 500 500 500 500 +id u32 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale) +0 0 0 0 0 0 0 0 +1 1 0 1 1 1 1 1 +2 2 0 2 2 2 2 2 +3 3 0 3 3 3 3 3 +4 4 0 4 4 4 4 4 +5 5 0 5 5 5 5 5 +6 6 0 6 6 6 6 6 +7 7 0 7 7 7 7 7 +8 8 0 8 8 8 8 8 +9 9 0 9 9 9 9 9 +10 10 0 10 10 10 10 10 +11 11 0 11 11 11 11 11 +12 12 0 12 12 12 12 12 +13 13 0 13 13 13 13 13 +14 14 0 14 14 14 14 14 +15 15 0 15 15 15 15 15 +16 16 0 16 16 16 16 16 +17 17 0 17 17 17 17 17 +18 18 0 18 18 18 18 18 +19 19 0 19 19 19 19 19 +20 10 0 10 10 10 10 10 +21 11 0 11 11 11 11 11 +22 12 0 12 12 12 12 12 +23 13 0 13 13 13 13 13 +24 14 0 14 14 14 14 14 +25 15 0 15 15 15 15 15 +26 16 0 16 16 16 16 16 +27 17 0 17 17 17 17 17 +28 18 0 18 18 18 18 18 +29 19 0 19 19 19 19 19 +30 20 0 20 20 20 20 20 +31 21 0 21 21 21 21 21 +32 22 0 22 22 22 22 22 +33 23 0 23 23 23 23 23 +34 24 0 24 24 24 24 24 +35 25 0 25 25 25 25 25 +36 26 0 26 26 26 26 26 +37 27 0 27 27 27 27 27 +38 28 0 28 28 28 28 28 +39 29 0 29 29 29 29 29 +40 0 -1 0 0 0 0 0 +41 1 -1 0 0 0 10 0 +42 2 -1 0 0 0 10 0 +43 3 -1 0 0 0 10 0 +44 4 -1 0 0 0 10 0 +45 5 -1 10 0 0 10 0 +46 6 -1 10 10 0 10 0 +47 7 -1 10 10 0 10 0 +48 8 -1 10 10 0 10 0 +49 9 -1 10 10 0 10 0 +50 10 -1 10 10 10 10 10 +51 11 -1 10 10 10 20 10 +52 12 -1 10 10 10 20 10 +53 13 -1 10 10 10 20 10 +54 14 -1 10 10 10 20 10 +55 15 -1 20 20 10 20 10 +56 16 -1 20 20 10 20 10 +57 17 -1 20 20 10 20 10 +58 18 -1 20 20 10 20 10 +59 19 -1 20 20 10 20 10 +60 10 -1 10 10 10 10 10 +61 11 -1 10 10 10 20 10 +62 12 -1 10 10 10 20 10 +63 13 -1 10 10 10 20 10 +64 14 -1 10 10 10 20 10 +65 15 -1 20 20 10 20 10 +66 16 -1 20 20 10 20 10 +67 17 -1 20 20 10 20 10 +68 18 -1 20 20 10 20 10 +69 19 -1 20 20 10 20 10 +70 20 -1 20 20 20 20 20 +71 21 -1 20 20 20 30 20 +72 22 -1 20 20 20 30 20 +73 23 -1 20 20 20 30 20 +74 24 -1 20 20 20 30 20 +75 25 -1 30 20 20 30 20 +76 26 -1 30 30 20 30 20 +77 27 -1 30 30 20 30 20 +78 28 -1 30 30 20 30 20 +79 29 -1 30 30 20 30 20 +80 0 -2 0 0 0 0 0 +81 1 -2 0 0 0 100 0 +82 2 -2 0 0 0 100 0 +83 3 -2 0 0 0 100 0 +84 4 -2 0 0 0 100 0 +85 5 -2 0 0 0 100 0 +86 6 -2 0 0 0 100 0 +87 7 -2 0 0 0 100 0 +88 8 -2 0 0 0 100 0 +89 9 -2 0 0 0 100 0 +90 10 -2 0 0 0 100 0 +91 11 -2 0 0 0 100 0 +92 12 -2 0 0 0 100 0 +93 13 -2 0 0 0 100 0 +94 14 -2 0 0 0 100 0 +95 15 -2 0 0 0 100 0 +96 16 -2 0 0 0 100 0 +97 17 -2 0 0 0 100 0 +98 18 -2 0 0 0 100 0 +99 19 -2 0 0 0 100 0 +100 10 -2 0 0 0 100 0 +101 11 -2 0 0 0 100 0 +102 12 -2 0 0 0 100 0 +103 13 -2 0 0 0 100 0 +104 14 -2 0 0 0 100 0 +105 15 -2 0 0 0 100 0 +106 16 -2 0 0 0 100 0 +107 17 -2 0 0 0 100 0 +108 18 -2 0 0 0 100 0 +109 19 -2 0 0 0 100 0 +110 20 -2 0 0 0 100 0 +111 21 -2 0 0 0 100 0 +112 22 -2 0 0 0 100 0 +113 23 -2 0 0 0 100 0 +114 24 -2 0 0 0 100 0 +115 25 -2 0 0 0 100 0 +116 26 -2 0 0 0 100 0 +117 27 -2 0 0 0 100 0 +118 28 -2 0 0 0 100 0 +119 29 -2 0 0 0 100 0 +200 0 0 0 0 0 0 0 +201 0 -1 0 0 0 0 0 +202 0 -2 0 0 0 0 0 +203 0 -3 0 0 0 0 0 +204 0 -4 0 0 0 0 0 +205 0 -5 0 0 0 0 0 +206 0 -6 0 0 0 0 0 +207 0 -7 0 0 0 0 0 +208 0 -8 0 0 0 0 0 +209 0 -9 0 0 0 0 0 +210 0 0 0 0 0 0 0 +211 0 -1 0 0 0 0 0 +212 0 -2 0 0 0 0 0 +213 0 -3 0 0 0 0 0 +214 0 -4 0 0 0 0 0 +215 0 -5 0 0 0 0 0 +216 0 -6 0 0 0 0 0 +217 0 -7 0 0 0 0 0 +218 0 -8 0 0 0 0 0 +219 0 -9 0 0 0 0 0 +300 2 4 2 2 2 2 2 +301 20 4 20 20 20 20 20 +302 200 4 200 200 200 200 200 +303 5 4 5 5 5 5 5 +304 50 4 50 50 50 50 50 +305 500 4 500 500 500 500 500 +id u64 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale) +0 0 0 0 0 0 0 0 +1 1 0 1 1 1 1 1 +2 2 0 2 2 2 2 2 +3 3 0 3 3 3 3 3 +4 4 0 4 4 4 4 4 +5 5 0 5 5 5 5 5 +6 6 0 6 6 6 6 6 +7 7 0 7 7 7 7 7 +8 8 0 8 8 8 8 8 +9 9 0 9 9 9 9 9 +10 10 0 10 10 10 10 10 +11 11 0 11 11 11 11 11 +12 12 0 12 12 12 12 12 +13 13 0 13 13 13 13 13 +14 14 0 14 14 14 14 14 +15 15 0 15 15 15 15 15 +16 16 0 16 16 16 16 16 +17 17 0 17 17 17 17 17 +18 18 0 18 18 18 18 18 +19 19 0 19 19 19 19 19 +20 10 0 10 10 10 10 10 +21 11 0 11 11 11 11 11 +22 12 0 12 12 12 12 12 +23 13 0 13 13 13 13 13 +24 14 0 14 14 14 14 14 +25 15 0 15 15 15 15 15 +26 16 0 16 16 16 16 16 +27 17 0 17 17 17 17 17 +28 18 0 18 18 18 18 18 +29 19 0 19 19 19 19 19 +30 20 0 20 20 20 20 20 +31 21 0 21 21 21 21 21 +32 22 0 22 22 22 22 22 +33 23 0 23 23 23 23 23 +34 24 0 24 24 24 24 24 +35 25 0 25 25 25 25 25 +36 26 0 26 26 26 26 26 +37 27 0 27 27 27 27 27 +38 28 0 28 28 28 28 28 +39 29 0 29 29 29 29 29 +40 0 -1 0 0 0 0 0 +41 1 -1 0 0 0 10 0 +42 2 -1 0 0 0 10 0 +43 3 -1 0 0 0 10 0 +44 4 -1 0 0 0 10 0 +45 5 -1 10 0 0 10 0 +46 6 -1 10 10 0 10 0 +47 7 -1 10 10 0 10 0 +48 8 -1 10 10 0 10 0 +49 9 -1 10 10 0 10 0 +50 10 -1 10 10 10 10 10 +51 11 -1 10 10 10 20 10 +52 12 -1 10 10 10 20 10 +53 13 -1 10 10 10 20 10 +54 14 -1 10 10 10 20 10 +55 15 -1 20 20 10 20 10 +56 16 -1 20 20 10 20 10 +57 17 -1 20 20 10 20 10 +58 18 -1 20 20 10 20 10 +59 19 -1 20 20 10 20 10 +60 10 -1 10 10 10 10 10 +61 11 -1 10 10 10 20 10 +62 12 -1 10 10 10 20 10 +63 13 -1 10 10 10 20 10 +64 14 -1 10 10 10 20 10 +65 15 -1 20 20 10 20 10 +66 16 -1 20 20 10 20 10 +67 17 -1 20 20 10 20 10 +68 18 -1 20 20 10 20 10 +69 19 -1 20 20 10 20 10 +70 20 -1 20 20 20 20 20 +71 21 -1 20 20 20 30 20 +72 22 -1 20 20 20 30 20 +73 23 -1 20 20 20 30 20 +74 24 -1 20 20 20 30 20 +75 25 -1 30 20 20 30 20 +76 26 -1 30 30 20 30 20 +77 27 -1 30 30 20 30 20 +78 28 -1 30 30 20 30 20 +79 29 -1 30 30 20 30 20 +80 0 -2 0 0 0 0 0 +81 1 -2 0 0 0 100 0 +82 2 -2 0 0 0 100 0 +83 3 -2 0 0 0 100 0 +84 4 -2 0 0 0 100 0 +85 5 -2 0 0 0 100 0 +86 6 -2 0 0 0 100 0 +87 7 -2 0 0 0 100 0 +88 8 -2 0 0 0 100 0 +89 9 -2 0 0 0 100 0 +90 10 -2 0 0 0 100 0 +91 11 -2 0 0 0 100 0 +92 12 -2 0 0 0 100 0 +93 13 -2 0 0 0 100 0 +94 14 -2 0 0 0 100 0 +95 15 -2 0 0 0 100 0 +96 16 -2 0 0 0 100 0 +97 17 -2 0 0 0 100 0 +98 18 -2 0 0 0 100 0 +99 19 -2 0 0 0 100 0 +100 10 -2 0 0 0 100 0 +101 11 -2 0 0 0 100 0 +102 12 -2 0 0 0 100 0 +103 13 -2 0 0 0 100 0 +104 14 -2 0 0 0 100 0 +105 15 -2 0 0 0 100 0 +106 16 -2 0 0 0 100 0 +107 17 -2 0 0 0 100 0 +108 18 -2 0 0 0 100 0 +109 19 -2 0 0 0 100 0 +110 20 -2 0 0 0 100 0 +111 21 -2 0 0 0 100 0 +112 22 -2 0 0 0 100 0 +113 23 -2 0 0 0 100 0 +114 24 -2 0 0 0 100 0 +115 25 -2 0 0 0 100 0 +116 26 -2 0 0 0 100 0 +117 27 -2 0 0 0 100 0 +118 28 -2 0 0 0 100 0 +119 29 -2 0 0 0 100 0 +200 0 0 0 0 0 0 0 +201 0 -1 0 0 0 0 0 +202 0 -2 0 0 0 0 0 +203 0 -3 0 0 0 0 0 +204 0 -4 0 0 0 0 0 +205 0 -5 0 0 0 0 0 +206 0 -6 0 0 0 0 0 +207 0 -7 0 0 0 0 0 +208 0 -8 0 0 0 0 0 +209 0 -9 0 0 0 0 0 +210 0 0 0 0 0 0 0 +211 0 -1 0 0 0 0 0 +212 0 -2 0 0 0 0 0 +213 0 -3 0 0 0 0 0 +214 0 -4 0 0 0 0 0 +215 0 -5 0 0 0 0 0 +216 0 -6 0 0 0 0 0 +217 0 -7 0 0 0 0 0 +218 0 -8 0 0 0 0 0 +219 0 -9 0 0 0 0 0 +300 2 4 2 2 2 2 2 +301 20 4 20 20 20 20 20 +302 200 4 200 200 200 200 200 +303 5 4 5 5 5 5 5 +304 50 4 50 50 50 50 50 +305 500 4 500 500 500 500 500 +id i8 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale) +0 0 0 0 0 0 0 0 +1 1 0 1 1 1 1 1 +2 2 0 2 2 2 2 2 +3 3 0 3 3 3 3 3 +4 4 0 4 4 4 4 4 +5 5 0 5 5 5 5 5 +6 6 0 6 6 6 6 6 +7 7 0 7 7 7 7 7 +8 8 0 8 8 8 8 8 +9 9 0 9 9 9 9 9 +10 10 0 10 10 10 10 10 +11 11 0 11 11 11 11 11 +12 12 0 12 12 12 12 12 +13 13 0 13 13 13 13 13 +14 14 0 14 14 14 14 14 +15 15 0 15 15 15 15 15 +16 16 0 16 16 16 16 16 +17 17 0 17 17 17 17 17 +18 18 0 18 18 18 18 18 +19 19 0 19 19 19 19 19 +20 -10 0 -10 -10 -10 -10 -10 +21 -9 0 -9 -9 -9 -9 -9 +22 -8 0 -8 -8 -8 -8 -8 +23 -7 0 -7 -7 -7 -7 -7 +24 -6 0 -6 -6 -6 -6 -6 +25 -5 0 -5 -5 -5 -5 -5 +26 -4 0 -4 -4 -4 -4 -4 +27 -3 0 -3 -3 -3 -3 -3 +28 -2 0 -2 -2 -2 -2 -2 +29 -1 0 -1 -1 -1 -1 -1 +30 0 0 0 0 0 0 0 +31 1 0 1 1 1 1 1 +32 2 0 2 2 2 2 2 +33 3 0 3 3 3 3 3 +34 4 0 4 4 4 4 4 +35 5 0 5 5 5 5 5 +36 6 0 6 6 6 6 6 +37 7 0 7 7 7 7 7 +38 8 0 8 8 8 8 8 +39 9 0 9 9 9 9 9 +40 0 -1 0 0 0 0 0 +41 1 -1 0 0 0 10 0 +42 2 -1 0 0 0 10 0 +43 3 -1 0 0 0 10 0 +44 4 -1 0 0 0 10 0 +45 5 -1 10 0 0 10 0 +46 6 -1 10 10 0 10 0 +47 7 -1 10 10 0 10 0 +48 8 -1 10 10 0 10 0 +49 9 -1 10 10 0 10 0 +50 10 -1 10 10 10 10 10 +51 11 -1 10 10 10 20 10 +52 12 -1 10 10 10 20 10 +53 13 -1 10 10 10 20 10 +54 14 -1 10 10 10 20 10 +55 15 -1 20 20 10 20 10 +56 16 -1 20 20 10 20 10 +57 17 -1 20 20 10 20 10 +58 18 -1 20 20 10 20 10 +59 19 -1 20 20 10 20 10 +60 -10 -1 -10 -10 -10 -10 -10 +61 -9 -1 -10 -10 -10 0 0 +62 -8 -1 -10 -10 -10 0 0 +63 -7 -1 -10 -10 -10 0 0 +64 -6 -1 -10 -10 -10 0 0 +65 -5 -1 -10 0 -10 0 0 +66 -4 -1 0 0 -10 0 0 +67 -3 -1 0 0 -10 0 0 +68 -2 -1 0 0 -10 0 0 +69 -1 -1 0 0 -10 0 0 +70 0 -1 0 0 0 0 0 +71 1 -1 0 0 0 10 0 +72 2 -1 0 0 0 10 0 +73 3 -1 0 0 0 10 0 +74 4 -1 0 0 0 10 0 +75 5 -1 10 0 0 10 0 +76 6 -1 10 10 0 10 0 +77 7 -1 10 10 0 10 0 +78 8 -1 10 10 0 10 0 +79 9 -1 10 10 0 10 0 +80 0 -2 0 0 0 0 0 +81 1 -2 0 0 0 100 0 +82 2 -2 0 0 0 100 0 +83 3 -2 0 0 0 100 0 +84 4 -2 0 0 0 100 0 +85 5 -2 0 0 0 100 0 +86 6 -2 0 0 0 100 0 +87 7 -2 0 0 0 100 0 +88 8 -2 0 0 0 100 0 +89 9 -2 0 0 0 100 0 +90 10 -2 0 0 0 100 0 +91 11 -2 0 0 0 100 0 +92 12 -2 0 0 0 100 0 +93 13 -2 0 0 0 100 0 +94 14 -2 0 0 0 100 0 +95 15 -2 0 0 0 100 0 +96 16 -2 0 0 0 100 0 +97 17 -2 0 0 0 100 0 +98 18 -2 0 0 0 100 0 +99 19 -2 0 0 0 100 0 +100 -10 -2 0 0 -100 0 0 +101 -9 -2 0 0 -100 0 0 +102 -8 -2 0 0 -100 0 0 +103 -7 -2 0 0 -100 0 0 +104 -6 -2 0 0 -100 0 0 +105 -5 -2 0 0 -100 0 0 +106 -4 -2 0 0 -100 0 0 +107 -3 -2 0 0 -100 0 0 +108 -2 -2 0 0 -100 0 0 +109 -1 -2 0 0 -100 0 0 +110 0 -2 0 0 0 0 0 +111 1 -2 0 0 0 100 0 +112 2 -2 0 0 0 100 0 +113 3 -2 0 0 0 100 0 +114 4 -2 0 0 0 100 0 +115 5 -2 0 0 0 100 0 +116 6 -2 0 0 0 100 0 +117 7 -2 0 0 0 100 0 +118 8 -2 0 0 0 100 0 +119 9 -2 0 0 0 100 0 +200 0 0 0 0 0 0 0 +201 0 -1 0 0 0 0 0 +202 0 -2 0 0 0 0 0 +203 0 -3 0 0 0 0 0 +204 0 -4 0 0 0 0 0 +205 0 -5 0 0 0 0 0 +206 0 -6 0 0 0 0 0 +207 0 -7 0 0 0 0 0 +208 0 -8 0 0 0 0 0 +209 0 -9 0 0 0 0 0 +210 0 0 0 0 0 0 0 +211 0 -1 0 0 0 0 0 +212 0 -2 0 0 0 0 0 +213 0 -3 0 0 0 0 0 +214 0 -4 0 0 0 0 0 +215 0 -5 0 0 0 0 0 +216 0 -6 0 0 0 0 0 +217 0 -7 0 0 0 0 0 +218 0 -8 0 0 0 0 0 +219 0 -9 0 0 0 0 0 +300 2 4 2 2 2 2 2 +301 20 4 20 20 20 20 20 +302 -56 4 -56 -56 -56 -56 -56 +303 5 4 5 5 5 5 5 +304 50 4 50 50 50 50 50 +305 -12 4 -12 -12 -12 -12 -12 +id i16 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale) +0 0 0 0 0 0 0 0 +1 1 0 1 1 1 1 1 +2 2 0 2 2 2 2 2 +3 3 0 3 3 3 3 3 +4 4 0 4 4 4 4 4 +5 5 0 5 5 5 5 5 +6 6 0 6 6 6 6 6 +7 7 0 7 7 7 7 7 +8 8 0 8 8 8 8 8 +9 9 0 9 9 9 9 9 +10 10 0 10 10 10 10 10 +11 11 0 11 11 11 11 11 +12 12 0 12 12 12 12 12 +13 13 0 13 13 13 13 13 +14 14 0 14 14 14 14 14 +15 15 0 15 15 15 15 15 +16 16 0 16 16 16 16 16 +17 17 0 17 17 17 17 17 +18 18 0 18 18 18 18 18 +19 19 0 19 19 19 19 19 +20 -10 0 -10 -10 -10 -10 -10 +21 -9 0 -9 -9 -9 -9 -9 +22 -8 0 -8 -8 -8 -8 -8 +23 -7 0 -7 -7 -7 -7 -7 +24 -6 0 -6 -6 -6 -6 -6 +25 -5 0 -5 -5 -5 -5 -5 +26 -4 0 -4 -4 -4 -4 -4 +27 -3 0 -3 -3 -3 -3 -3 +28 -2 0 -2 -2 -2 -2 -2 +29 -1 0 -1 -1 -1 -1 -1 +30 0 0 0 0 0 0 0 +31 1 0 1 1 1 1 1 +32 2 0 2 2 2 2 2 +33 3 0 3 3 3 3 3 +34 4 0 4 4 4 4 4 +35 5 0 5 5 5 5 5 +36 6 0 6 6 6 6 6 +37 7 0 7 7 7 7 7 +38 8 0 8 8 8 8 8 +39 9 0 9 9 9 9 9 +40 0 -1 0 0 0 0 0 +41 1 -1 0 0 0 10 0 +42 2 -1 0 0 0 10 0 +43 3 -1 0 0 0 10 0 +44 4 -1 0 0 0 10 0 +45 5 -1 10 0 0 10 0 +46 6 -1 10 10 0 10 0 +47 7 -1 10 10 0 10 0 +48 8 -1 10 10 0 10 0 +49 9 -1 10 10 0 10 0 +50 10 -1 10 10 10 10 10 +51 11 -1 10 10 10 20 10 +52 12 -1 10 10 10 20 10 +53 13 -1 10 10 10 20 10 +54 14 -1 10 10 10 20 10 +55 15 -1 20 20 10 20 10 +56 16 -1 20 20 10 20 10 +57 17 -1 20 20 10 20 10 +58 18 -1 20 20 10 20 10 +59 19 -1 20 20 10 20 10 +60 -10 -1 -10 -10 -10 -10 -10 +61 -9 -1 -10 -10 -10 0 0 +62 -8 -1 -10 -10 -10 0 0 +63 -7 -1 -10 -10 -10 0 0 +64 -6 -1 -10 -10 -10 0 0 +65 -5 -1 -10 0 -10 0 0 +66 -4 -1 0 0 -10 0 0 +67 -3 -1 0 0 -10 0 0 +68 -2 -1 0 0 -10 0 0 +69 -1 -1 0 0 -10 0 0 +70 0 -1 0 0 0 0 0 +71 1 -1 0 0 0 10 0 +72 2 -1 0 0 0 10 0 +73 3 -1 0 0 0 10 0 +74 4 -1 0 0 0 10 0 +75 5 -1 10 0 0 10 0 +76 6 -1 10 10 0 10 0 +77 7 -1 10 10 0 10 0 +78 8 -1 10 10 0 10 0 +79 9 -1 10 10 0 10 0 +80 0 -2 0 0 0 0 0 +81 1 -2 0 0 0 100 0 +82 2 -2 0 0 0 100 0 +83 3 -2 0 0 0 100 0 +84 4 -2 0 0 0 100 0 +85 5 -2 0 0 0 100 0 +86 6 -2 0 0 0 100 0 +87 7 -2 0 0 0 100 0 +88 8 -2 0 0 0 100 0 +89 9 -2 0 0 0 100 0 +90 10 -2 0 0 0 100 0 +91 11 -2 0 0 0 100 0 +92 12 -2 0 0 0 100 0 +93 13 -2 0 0 0 100 0 +94 14 -2 0 0 0 100 0 +95 15 -2 0 0 0 100 0 +96 16 -2 0 0 0 100 0 +97 17 -2 0 0 0 100 0 +98 18 -2 0 0 0 100 0 +99 19 -2 0 0 0 100 0 +100 -10 -2 0 0 -100 0 0 +101 -9 -2 0 0 -100 0 0 +102 -8 -2 0 0 -100 0 0 +103 -7 -2 0 0 -100 0 0 +104 -6 -2 0 0 -100 0 0 +105 -5 -2 0 0 -100 0 0 +106 -4 -2 0 0 -100 0 0 +107 -3 -2 0 0 -100 0 0 +108 -2 -2 0 0 -100 0 0 +109 -1 -2 0 0 -100 0 0 +110 0 -2 0 0 0 0 0 +111 1 -2 0 0 0 100 0 +112 2 -2 0 0 0 100 0 +113 3 -2 0 0 0 100 0 +114 4 -2 0 0 0 100 0 +115 5 -2 0 0 0 100 0 +116 6 -2 0 0 0 100 0 +117 7 -2 0 0 0 100 0 +118 8 -2 0 0 0 100 0 +119 9 -2 0 0 0 100 0 +200 0 0 0 0 0 0 0 +201 0 -1 0 0 0 0 0 +202 0 -2 0 0 0 0 0 +203 0 -3 0 0 0 0 0 +204 0 -4 0 0 0 0 0 +205 0 -5 0 0 0 0 0 +206 0 -6 0 0 0 0 0 +207 0 -7 0 0 0 0 0 +208 0 -8 0 0 0 0 0 +209 0 -9 0 0 0 0 0 +210 0 0 0 0 0 0 0 +211 0 -1 0 0 0 0 0 +212 0 -2 0 0 0 0 0 +213 0 -3 0 0 0 0 0 +214 0 -4 0 0 0 0 0 +215 0 -5 0 0 0 0 0 +216 0 -6 0 0 0 0 0 +217 0 -7 0 0 0 0 0 +218 0 -8 0 0 0 0 0 +219 0 -9 0 0 0 0 0 +300 2 4 2 2 2 2 2 +301 20 4 20 20 20 20 20 +302 200 4 200 200 200 200 200 +303 5 4 5 5 5 5 5 +304 50 4 50 50 50 50 50 +305 500 4 500 500 500 500 500 +id i32 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale) +0 0 0 0 0 0 0 0 +1 1 0 1 1 1 1 1 +2 2 0 2 2 2 2 2 +3 3 0 3 3 3 3 3 +4 4 0 4 4 4 4 4 +5 5 0 5 5 5 5 5 +6 6 0 6 6 6 6 6 +7 7 0 7 7 7 7 7 +8 8 0 8 8 8 8 8 +9 9 0 9 9 9 9 9 +10 10 0 10 10 10 10 10 +11 11 0 11 11 11 11 11 +12 12 0 12 12 12 12 12 +13 13 0 13 13 13 13 13 +14 14 0 14 14 14 14 14 +15 15 0 15 15 15 15 15 +16 16 0 16 16 16 16 16 +17 17 0 17 17 17 17 17 +18 18 0 18 18 18 18 18 +19 19 0 19 19 19 19 19 +20 -10 0 -10 -10 -10 -10 -10 +21 -9 0 -9 -9 -9 -9 -9 +22 -8 0 -8 -8 -8 -8 -8 +23 -7 0 -7 -7 -7 -7 -7 +24 -6 0 -6 -6 -6 -6 -6 +25 -5 0 -5 -5 -5 -5 -5 +26 -4 0 -4 -4 -4 -4 -4 +27 -3 0 -3 -3 -3 -3 -3 +28 -2 0 -2 -2 -2 -2 -2 +29 -1 0 -1 -1 -1 -1 -1 +30 0 0 0 0 0 0 0 +31 1 0 1 1 1 1 1 +32 2 0 2 2 2 2 2 +33 3 0 3 3 3 3 3 +34 4 0 4 4 4 4 4 +35 5 0 5 5 5 5 5 +36 6 0 6 6 6 6 6 +37 7 0 7 7 7 7 7 +38 8 0 8 8 8 8 8 +39 9 0 9 9 9 9 9 +40 0 -1 0 0 0 0 0 +41 1 -1 0 0 0 10 0 +42 2 -1 0 0 0 10 0 +43 3 -1 0 0 0 10 0 +44 4 -1 0 0 0 10 0 +45 5 -1 10 0 0 10 0 +46 6 -1 10 10 0 10 0 +47 7 -1 10 10 0 10 0 +48 8 -1 10 10 0 10 0 +49 9 -1 10 10 0 10 0 +50 10 -1 10 10 10 10 10 +51 11 -1 10 10 10 20 10 +52 12 -1 10 10 10 20 10 +53 13 -1 10 10 10 20 10 +54 14 -1 10 10 10 20 10 +55 15 -1 20 20 10 20 10 +56 16 -1 20 20 10 20 10 +57 17 -1 20 20 10 20 10 +58 18 -1 20 20 10 20 10 +59 19 -1 20 20 10 20 10 +60 -10 -1 -10 -10 -10 -10 -10 +61 -9 -1 -10 -10 -10 0 0 +62 -8 -1 -10 -10 -10 0 0 +63 -7 -1 -10 -10 -10 0 0 +64 -6 -1 -10 -10 -10 0 0 +65 -5 -1 -10 0 -10 0 0 +66 -4 -1 0 0 -10 0 0 +67 -3 -1 0 0 -10 0 0 +68 -2 -1 0 0 -10 0 0 +69 -1 -1 0 0 -10 0 0 +70 0 -1 0 0 0 0 0 +71 1 -1 0 0 0 10 0 +72 2 -1 0 0 0 10 0 +73 3 -1 0 0 0 10 0 +74 4 -1 0 0 0 10 0 +75 5 -1 10 0 0 10 0 +76 6 -1 10 10 0 10 0 +77 7 -1 10 10 0 10 0 +78 8 -1 10 10 0 10 0 +79 9 -1 10 10 0 10 0 +80 0 -2 0 0 0 0 0 +81 1 -2 0 0 0 100 0 +82 2 -2 0 0 0 100 0 +83 3 -2 0 0 0 100 0 +84 4 -2 0 0 0 100 0 +85 5 -2 0 0 0 100 0 +86 6 -2 0 0 0 100 0 +87 7 -2 0 0 0 100 0 +88 8 -2 0 0 0 100 0 +89 9 -2 0 0 0 100 0 +90 10 -2 0 0 0 100 0 +91 11 -2 0 0 0 100 0 +92 12 -2 0 0 0 100 0 +93 13 -2 0 0 0 100 0 +94 14 -2 0 0 0 100 0 +95 15 -2 0 0 0 100 0 +96 16 -2 0 0 0 100 0 +97 17 -2 0 0 0 100 0 +98 18 -2 0 0 0 100 0 +99 19 -2 0 0 0 100 0 +100 -10 -2 0 0 -100 0 0 +101 -9 -2 0 0 -100 0 0 +102 -8 -2 0 0 -100 0 0 +103 -7 -2 0 0 -100 0 0 +104 -6 -2 0 0 -100 0 0 +105 -5 -2 0 0 -100 0 0 +106 -4 -2 0 0 -100 0 0 +107 -3 -2 0 0 -100 0 0 +108 -2 -2 0 0 -100 0 0 +109 -1 -2 0 0 -100 0 0 +110 0 -2 0 0 0 0 0 +111 1 -2 0 0 0 100 0 +112 2 -2 0 0 0 100 0 +113 3 -2 0 0 0 100 0 +114 4 -2 0 0 0 100 0 +115 5 -2 0 0 0 100 0 +116 6 -2 0 0 0 100 0 +117 7 -2 0 0 0 100 0 +118 8 -2 0 0 0 100 0 +119 9 -2 0 0 0 100 0 +200 0 0 0 0 0 0 0 +201 0 -1 0 0 0 0 0 +202 0 -2 0 0 0 0 0 +203 0 -3 0 0 0 0 0 +204 0 -4 0 0 0 0 0 +205 0 -5 0 0 0 0 0 +206 0 -6 0 0 0 0 0 +207 0 -7 0 0 0 0 0 +208 0 -8 0 0 0 0 0 +209 0 -9 0 0 0 0 0 +210 0 0 0 0 0 0 0 +211 0 -1 0 0 0 0 0 +212 0 -2 0 0 0 0 0 +213 0 -3 0 0 0 0 0 +214 0 -4 0 0 0 0 0 +215 0 -5 0 0 0 0 0 +216 0 -6 0 0 0 0 0 +217 0 -7 0 0 0 0 0 +218 0 -8 0 0 0 0 0 +219 0 -9 0 0 0 0 0 +300 2 4 2 2 2 2 2 +301 20 4 20 20 20 20 20 +302 200 4 200 200 200 200 200 +303 5 4 5 5 5 5 5 +304 50 4 50 50 50 50 50 +305 500 4 500 500 500 500 500 +id i64 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale) +0 0 0 0 0 0 0 0 +1 1 0 1 1 1 1 1 +2 2 0 2 2 2 2 2 +3 3 0 3 3 3 3 3 +4 4 0 4 4 4 4 4 +5 5 0 5 5 5 5 5 +6 6 0 6 6 6 6 6 +7 7 0 7 7 7 7 7 +8 8 0 8 8 8 8 8 +9 9 0 9 9 9 9 9 +10 10 0 10 10 10 10 10 +11 11 0 11 11 11 11 11 +12 12 0 12 12 12 12 12 +13 13 0 13 13 13 13 13 +14 14 0 14 14 14 14 14 +15 15 0 15 15 15 15 15 +16 16 0 16 16 16 16 16 +17 17 0 17 17 17 17 17 +18 18 0 18 18 18 18 18 +19 19 0 19 19 19 19 19 +20 -10 0 -10 -10 -10 -10 -10 +21 -9 0 -9 -9 -9 -9 -9 +22 -8 0 -8 -8 -8 -8 -8 +23 -7 0 -7 -7 -7 -7 -7 +24 -6 0 -6 -6 -6 -6 -6 +25 -5 0 -5 -5 -5 -5 -5 +26 -4 0 -4 -4 -4 -4 -4 +27 -3 0 -3 -3 -3 -3 -3 +28 -2 0 -2 -2 -2 -2 -2 +29 -1 0 -1 -1 -1 -1 -1 +30 0 0 0 0 0 0 0 +31 1 0 1 1 1 1 1 +32 2 0 2 2 2 2 2 +33 3 0 3 3 3 3 3 +34 4 0 4 4 4 4 4 +35 5 0 5 5 5 5 5 +36 6 0 6 6 6 6 6 +37 7 0 7 7 7 7 7 +38 8 0 8 8 8 8 8 +39 9 0 9 9 9 9 9 +40 0 -1 0 0 0 0 0 +41 1 -1 0 0 0 10 0 +42 2 -1 0 0 0 10 0 +43 3 -1 0 0 0 10 0 +44 4 -1 0 0 0 10 0 +45 5 -1 10 0 0 10 0 +46 6 -1 10 10 0 10 0 +47 7 -1 10 10 0 10 0 +48 8 -1 10 10 0 10 0 +49 9 -1 10 10 0 10 0 +50 10 -1 10 10 10 10 10 +51 11 -1 10 10 10 20 10 +52 12 -1 10 10 10 20 10 +53 13 -1 10 10 10 20 10 +54 14 -1 10 10 10 20 10 +55 15 -1 20 20 10 20 10 +56 16 -1 20 20 10 20 10 +57 17 -1 20 20 10 20 10 +58 18 -1 20 20 10 20 10 +59 19 -1 20 20 10 20 10 +60 -10 -1 -10 -10 -10 -10 -10 +61 -9 -1 -10 -10 -10 0 0 +62 -8 -1 -10 -10 -10 0 0 +63 -7 -1 -10 -10 -10 0 0 +64 -6 -1 -10 -10 -10 0 0 +65 -5 -1 -10 0 -10 0 0 +66 -4 -1 0 0 -10 0 0 +67 -3 -1 0 0 -10 0 0 +68 -2 -1 0 0 -10 0 0 +69 -1 -1 0 0 -10 0 0 +70 0 -1 0 0 0 0 0 +71 1 -1 0 0 0 10 0 +72 2 -1 0 0 0 10 0 +73 3 -1 0 0 0 10 0 +74 4 -1 0 0 0 10 0 +75 5 -1 10 0 0 10 0 +76 6 -1 10 10 0 10 0 +77 7 -1 10 10 0 10 0 +78 8 -1 10 10 0 10 0 +79 9 -1 10 10 0 10 0 +80 0 -2 0 0 0 0 0 +81 1 -2 0 0 0 100 0 +82 2 -2 0 0 0 100 0 +83 3 -2 0 0 0 100 0 +84 4 -2 0 0 0 100 0 +85 5 -2 0 0 0 100 0 +86 6 -2 0 0 0 100 0 +87 7 -2 0 0 0 100 0 +88 8 -2 0 0 0 100 0 +89 9 -2 0 0 0 100 0 +90 10 -2 0 0 0 100 0 +91 11 -2 0 0 0 100 0 +92 12 -2 0 0 0 100 0 +93 13 -2 0 0 0 100 0 +94 14 -2 0 0 0 100 0 +95 15 -2 0 0 0 100 0 +96 16 -2 0 0 0 100 0 +97 17 -2 0 0 0 100 0 +98 18 -2 0 0 0 100 0 +99 19 -2 0 0 0 100 0 +100 -10 -2 0 0 -100 0 0 +101 -9 -2 0 0 -100 0 0 +102 -8 -2 0 0 -100 0 0 +103 -7 -2 0 0 -100 0 0 +104 -6 -2 0 0 -100 0 0 +105 -5 -2 0 0 -100 0 0 +106 -4 -2 0 0 -100 0 0 +107 -3 -2 0 0 -100 0 0 +108 -2 -2 0 0 -100 0 0 +109 -1 -2 0 0 -100 0 0 +110 0 -2 0 0 0 0 0 +111 1 -2 0 0 0 100 0 +112 2 -2 0 0 0 100 0 +113 3 -2 0 0 0 100 0 +114 4 -2 0 0 0 100 0 +115 5 -2 0 0 0 100 0 +116 6 -2 0 0 0 100 0 +117 7 -2 0 0 0 100 0 +118 8 -2 0 0 0 100 0 +119 9 -2 0 0 0 100 0 +200 0 0 0 0 0 0 0 +201 0 -1 0 0 0 0 0 +202 0 -2 0 0 0 0 0 +203 0 -3 0 0 0 0 0 +204 0 -4 0 0 0 0 0 +205 0 -5 0 0 0 0 0 +206 0 -6 0 0 0 0 0 +207 0 -7 0 0 0 0 0 +208 0 -8 0 0 0 0 0 +209 0 -9 0 0 0 0 0 +210 0 0 0 0 0 0 0 +211 0 -1 0 0 0 0 0 +212 0 -2 0 0 0 0 0 +213 0 -3 0 0 0 0 0 +214 0 -4 0 0 0 0 0 +215 0 -5 0 0 0 0 0 +216 0 -6 0 0 0 0 0 +217 0 -7 0 0 0 0 0 +218 0 -8 0 0 0 0 0 +219 0 -9 0 0 0 0 0 +300 2 4 2 2 2 2 2 +301 20 4 20 20 20 20 20 +302 200 4 200 200 200 200 200 +303 5 4 5 5 5 5 5 +304 50 4 50 50 50 50 50 +305 500 4 500 500 500 500 500 +id f32 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale) +0 0 0 0 0 0 0 0 +1 1 0 1 1 1 1 1 +2 2 0 2 2 2 2 2 +3 3 0 3 3 3 3 3 +4 4 0 4 4 4 4 4 +5 5 0 5 5 5 5 5 +6 6 0 6 6 6 6 6 +7 7 0 7 7 7 7 7 +8 8 0 8 8 8 8 8 +9 9 0 9 9 9 9 9 +10 10 0 10 10 10 10 10 +11 11 0 11 11 11 11 11 +12 12 0 12 12 12 12 12 +13 13 0 13 13 13 13 13 +14 14 0 14 14 14 14 14 +15 15 0 15 15 15 15 15 +16 16 0 16 16 16 16 16 +17 17 0 17 17 17 17 17 +18 18 0 18 18 18 18 18 +19 19 0 19 19 19 19 19 +20 -1 0 -1 -1 -1 -1 -1 +21 -0.9 0 -1 -1 -1 -0 -0 +22 -0.8 0 -1 -1 -1 -0 -0 +23 -0.7 0 -1 -1 -1 -0 -0 +24 -0.6 0 -1 -1 -1 -0 -0 +25 -0.5 0 -0 -0 -1 -0 -0 +26 -0.4 0 -0 -0 -1 -0 -0 +27 -0.3 0 -0 -0 -1 -0 -0 +28 -0.2 0 -0 -0 -1 -0 -0 +29 -0.1 0 -0 -0 -1 -0 -0 +30 0 0 0 0 0 0 0 +31 0.1 0 0 0 0 1 0 +32 0.2 0 0 0 0 1 0 +33 0.3 0 0 0 0 1 0 +34 0.4 0 0 0 0 1 0 +35 0.5 0 0 0 0 1 0 +36 0.6 0 1 1 0 1 0 +37 0.7 0 1 1 0 1 0 +38 0.8 0 1 1 0 1 0 +39 0.9 0 1 1 0 1 0 +40 0 -1 0 0 0 0 0 +41 1 -1 0 0 0 10 0 +42 2 -1 0 0 0 10 0 +43 3 -1 0 0 0 10 0 +44 4 -1 0 0 0 10 0 +45 5 -1 0 0 0 10 0 +46 6 -1 10 10 0 10 0 +47 7 -1 10 10 0 10 0 +48 8 -1 10 10 0 10 0 +49 9 -1 10 10 0 10 0 +50 10 -1 10 10 10 10 10 +51 11 -1 10 10 10 20 10 +52 12 -1 10 10 10 20 10 +53 13 -1 10 10 10 20 10 +54 14 -1 10 10 10 20 10 +55 15 -1 20 20 10 20 10 +56 16 -1 20 20 10 20 10 +57 17 -1 20 20 10 20 10 +58 18 -1 20 20 10 20 10 +59 19 -1 20 20 10 20 10 +60 -1 -1 -0 -0 -10 -0 -0 +61 -0.9 -1 -0 -0 -10 -0 -0 +62 -0.8 -1 -0 -0 -10 -0 -0 +63 -0.7 -1 -0 -0 -10 -0 -0 +64 -0.6 -1 -0 -0 -10 -0 -0 +65 -0.5 -1 -0 -0 -10 -0 -0 +66 -0.4 -1 -0 -0 -10 -0 -0 +67 -0.3 -1 -0 -0 -10 -0 -0 +68 -0.2 -1 -0 -0 -10 -0 -0 +69 -0.1 -1 -0 -0 -10 -0 -0 +70 0 -1 0 0 0 0 0 +71 0.1 -1 0 0 0 10 0 +72 0.2 -1 0 0 0 10 0 +73 0.3 -1 0 0 0 10 0 +74 0.4 -1 0 0 0 10 0 +75 0.5 -1 0 0 0 10 0 +76 0.6 -1 0 0 0 10 0 +77 0.7 -1 0 0 0 10 0 +78 0.8 -1 0 0 0 10 0 +79 0.9 -1 0 0 0 10 0 +80 0 -2 0 0 0 0 0 +81 1 -2 0 0 0 100 0 +82 2 -2 0 0 0 100 0 +83 3 -2 0 0 0 100 0 +84 4 -2 0 0 0 100 0 +85 5 -2 0 0 0 100 0 +86 6 -2 0 0 0 100 0 +87 7 -2 0 0 0 100 0 +88 8 -2 0 0 0 100 0 +89 9 -2 0 0 0 100 0 +90 10 -2 0 0 0 100 0 +91 11 -2 0 0 0 100 0 +92 12 -2 0 0 0 100 0 +93 13 -2 0 0 0 100 0 +94 14 -2 0 0 0 100 0 +95 15 -2 0 0 0 100 0 +96 16 -2 0 0 0 100 0 +97 17 -2 0 0 0 100 0 +98 18 -2 0 0 0 100 0 +99 19 -2 0 0 0 100 0 +100 -1 -2 -0 -0 -100 -0 -0 +101 -0.9 -2 -0 -0 -100 -0 -0 +102 -0.8 -2 -0 -0 -100 -0 -0 +103 -0.7 -2 -0 -0 -100 -0 -0 +104 -0.6 -2 -0 -0 -100 -0 -0 +105 -0.5 -2 -0 -0 -100 -0 -0 +106 -0.4 -2 -0 -0 -100 -0 -0 +107 -0.3 -2 -0 -0 -100 -0 -0 +108 -0.2 -2 -0 -0 -100 -0 -0 +109 -0.1 -2 -0 -0 -100 -0 -0 +110 0 -2 0 0 0 0 0 +111 0.1 -2 0 0 0 100 0 +112 0.2 -2 0 0 0 100 0 +113 0.3 -2 0 0 0 100 0 +114 0.4 -2 0 0 0 100 0 +115 0.5 -2 0 0 0 100 0 +116 0.6 -2 0 0 0 100 0 +117 0.7 -2 0 0 0 100 0 +118 0.8 -2 0 0 0 100 0 +119 0.9 -2 0 0 0 100 0 +200 12345.679 0 12346 12346 12345 12346 12345 +201 12345.679 -1 12350 12350 12340 12350 12340 +202 12345.679 -2 12300 12300 12300 12400 12300 +203 12345.679 -3 12000 12000 12000 13000 12000 +204 12345.679 -4 10000 10000 10000 20000 10000 +205 12345.679 -5 0 0 0 100000 0 +206 12345.679 -6 0 0 0 1000000 0 +207 12345.679 -7 0 0 0 10000000 0 +208 12345.679 -8 0 0 0 100000000 0 +209 12345.679 -9 0 0 0 1000000000 0 +210 12345.679 0 12346 12346 12345 12346 12345 +211 12345.679 -1 12350 12350 12340 12350 12340 +212 12345.679 -2 12300 12300 12300 12400 12300 +213 12345.679 -3 12000 12000 12000 13000 12000 +214 12345.679 -4 10000 10000 10000 20000 10000 +215 12345.679 -5 0 0 0 100000 0 +216 12345.679 -6 0 0 0 1000000 0 +217 12345.679 -7 0 0 0 10000000 0 +218 12345.679 -8 0 0 0 100000000 0 +219 12345.679 -9 0 0 0 1000000000 0 +300 2 4 2 2 2 2 2 +301 20 4 20 20 20 20 20 +302 200 4 200 200 200 200 200 +303 5 4 5 5 5 5 5 +304 50 4 50 50 50 50 50 +305 500 4 500 500 500 500 500 +id f64 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale) +0 0 0 0 0 0 0 0 +1 1 0 1 1 1 1 1 +2 2 0 2 2 2 2 2 +3 3 0 3 3 3 3 3 +4 4 0 4 4 4 4 4 +5 5 0 5 5 5 5 5 +6 6 0 6 6 6 6 6 +7 7 0 7 7 7 7 7 +8 8 0 8 8 8 8 8 +9 9 0 9 9 9 9 9 +10 10 0 10 10 10 10 10 +11 11 0 11 11 11 11 11 +12 12 0 12 12 12 12 12 +13 13 0 13 13 13 13 13 +14 14 0 14 14 14 14 14 +15 15 0 15 15 15 15 15 +16 16 0 16 16 16 16 16 +17 17 0 17 17 17 17 17 +18 18 0 18 18 18 18 18 +19 19 0 19 19 19 19 19 +20 -1 0 -1 -1 -1 -1 -1 +21 -0.9 0 -1 -1 -1 -0 -0 +22 -0.8 0 -1 -1 -1 -0 -0 +23 -0.7 0 -1 -1 -1 -0 -0 +24 -0.6 0 -1 -1 -1 -0 -0 +25 -0.5 0 -0 -0 -1 -0 -0 +26 -0.4 0 -0 -0 -1 -0 -0 +27 -0.3 0 -0 -0 -1 -0 -0 +28 -0.2 0 -0 -0 -1 -0 -0 +29 -0.1 0 -0 -0 -1 -0 -0 +30 0 0 0 0 0 0 0 +31 0.1 0 0 0 0 1 0 +32 0.2 0 0 0 0 1 0 +33 0.3 0 0 0 0 1 0 +34 0.4 0 0 0 0 1 0 +35 0.5 0 0 0 0 1 0 +36 0.6 0 1 1 0 1 0 +37 0.7 0 1 1 0 1 0 +38 0.8 0 1 1 0 1 0 +39 0.9 0 1 1 0 1 0 +40 0 -1 0 0 0 0 0 +41 1 -1 0 0 0 10 0 +42 2 -1 0 0 0 10 0 +43 3 -1 0 0 0 10 0 +44 4 -1 0 0 0 10 0 +45 5 -1 0 0 0 10 0 +46 6 -1 10 10 0 10 0 +47 7 -1 10 10 0 10 0 +48 8 -1 10 10 0 10 0 +49 9 -1 10 10 0 10 0 +50 10 -1 10 10 10 10 10 +51 11 -1 10 10 10 20 10 +52 12 -1 10 10 10 20 10 +53 13 -1 10 10 10 20 10 +54 14 -1 10 10 10 20 10 +55 15 -1 20 20 10 20 10 +56 16 -1 20 20 10 20 10 +57 17 -1 20 20 10 20 10 +58 18 -1 20 20 10 20 10 +59 19 -1 20 20 10 20 10 +60 -1 -1 -0 -0 -10 -0 -0 +61 -0.9 -1 -0 -0 -10 -0 -0 +62 -0.8 -1 -0 -0 -10 -0 -0 +63 -0.7 -1 -0 -0 -10 -0 -0 +64 -0.6 -1 -0 -0 -10 -0 -0 +65 -0.5 -1 -0 -0 -10 -0 -0 +66 -0.4 -1 -0 -0 -10 -0 -0 +67 -0.3 -1 -0 -0 -10 -0 -0 +68 -0.2 -1 -0 -0 -10 -0 -0 +69 -0.1 -1 -0 -0 -10 -0 -0 +70 0 -1 0 0 0 0 0 +71 0.1 -1 0 0 0 10 0 +72 0.2 -1 0 0 0 10 0 +73 0.3 -1 0 0 0 10 0 +74 0.4 -1 0 0 0 10 0 +75 0.5 -1 0 0 0 10 0 +76 0.6 -1 0 0 0 10 0 +77 0.7 -1 0 0 0 10 0 +78 0.8 -1 0 0 0 10 0 +79 0.9 -1 0 0 0 10 0 +80 0 -2 0 0 0 0 0 +81 1 -2 0 0 0 100 0 +82 2 -2 0 0 0 100 0 +83 3 -2 0 0 0 100 0 +84 4 -2 0 0 0 100 0 +85 5 -2 0 0 0 100 0 +86 6 -2 0 0 0 100 0 +87 7 -2 0 0 0 100 0 +88 8 -2 0 0 0 100 0 +89 9 -2 0 0 0 100 0 +90 10 -2 0 0 0 100 0 +91 11 -2 0 0 0 100 0 +92 12 -2 0 0 0 100 0 +93 13 -2 0 0 0 100 0 +94 14 -2 0 0 0 100 0 +95 15 -2 0 0 0 100 0 +96 16 -2 0 0 0 100 0 +97 17 -2 0 0 0 100 0 +98 18 -2 0 0 0 100 0 +99 19 -2 0 0 0 100 0 +100 -1 -2 -0 -0 -100 -0 -0 +101 -0.9 -2 -0 -0 -100 -0 -0 +102 -0.8 -2 -0 -0 -100 -0 -0 +103 -0.7 -2 -0 -0 -100 -0 -0 +104 -0.6 -2 -0 -0 -100 -0 -0 +105 -0.5 -2 -0 -0 -100 -0 -0 +106 -0.4 -2 -0 -0 -100 -0 -0 +107 -0.3 -2 -0 -0 -100 -0 -0 +108 -0.2 -2 -0 -0 -100 -0 -0 +109 -0.1 -2 -0 -0 -100 -0 -0 +110 0 -2 0 0 0 0 0 +111 0.1 -2 0 0 0 100 0 +112 0.2 -2 0 0 0 100 0 +113 0.3 -2 0 0 0 100 0 +114 0.4 -2 0 0 0 100 0 +115 0.5 -2 0 0 0 100 0 +116 0.6 -2 0 0 0 100 0 +117 0.7 -2 0 0 0 100 0 +118 0.8 -2 0 0 0 100 0 +119 0.9 -2 0 0 0 100 0 +200 12345.6789 0 12346 12346 12345 12346 12345 +201 12345.6789 -1 12350 12350 12340 12350 12340 +202 12345.6789 -2 12300 12300 12300 12400 12300 +203 12345.6789 -3 12000 12000 12000 13000 12000 +204 12345.6789 -4 10000 10000 10000 20000 10000 +205 12345.6789 -5 0 0 0 100000 0 +206 12345.6789 -6 0 0 0 1000000 0 +207 12345.6789 -7 0 0 0 10000000 0 +208 12345.6789 -8 0 0 0 100000000 0 +209 12345.6789 -9 0 0 0 1000000000 0 +210 12345.6789 0 12346 12346 12345 12346 12345 +211 12345.6789 -1 12350 12350 12340 12350 12340 +212 12345.6789 -2 12300 12300 12300 12400 12300 +213 12345.6789 -3 12000 12000 12000 13000 12000 +214 12345.6789 -4 10000 10000 10000 20000 10000 +215 12345.6789 -5 0 0 0 100000 0 +216 12345.6789 -6 0 0 0 1000000 0 +217 12345.6789 -7 0 0 0 10000000 0 +218 12345.6789 -8 0 0 0 100000000 0 +219 12345.6789 -9 0 0 0 1000000000 0 +300 2 4 2 2 2 2 2 +301 20 4 20 20 20 20 20 +302 200 4 200 200 200 200 200 +303 5 4 5 5 5 5 5 +304 50 4 50 50 50 50 50 +305 500 4 500 500 500 500 500 +CHECKPOINT2 +1 42.42 42.42 42.42 42.42 +2 0.0084 0.0084 0.0084 0.0084 +3 6.513 6.513 6.513 6.513 +4 115.6011 115.6011 115.6011 115.6011 +5 1.6029 1.6029 1.6029 1.6029 +6 -0.9999 -0.9999 -0.9999 -0.9999 +7 0.0084 0.0084 0.0084 0.0084 +8 3.7476 3.7476 3.7476 3.7476 +9 5.4066 5.4066 5.4066 5.4066 +10 1.6275 1.6275 1.6275 1.6275 +1 +1 +1 diff --git a/tests/queries/0_stateless/03165_round_scale_as_column.sql b/tests/queries/0_stateless/03165_round_scale_as_column.sql new file mode 100644 index 00000000000..229f705808d --- /dev/null +++ b/tests/queries/0_stateless/03165_round_scale_as_column.sql @@ -0,0 +1,123 @@ +-- Tests functions round(), roundBankers(), floor(), ceil() and trunc() with default 'scale' argument +SELECT toUInt8(number) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; +SELECT toUInt16(number) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; +SELECT toUInt32(number) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; +SELECT toUInt64(number) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; +SELECT toInt8(number - 10) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; +SELECT toInt16(number - 10) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; +SELECT toInt32(number - 10) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; +SELECT toInt64(number - 10) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; + +SELECT toFloat32(number - 10) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; +SELECT toFloat64(number - 10) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; +SELECT toFloat32((number - 10) / 10) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; +SELECT toFloat64((number - 10) / 10) AS x, round(x), roundBankers(x), floor(x), ceil(x), trunc(x) FROM system.numbers LIMIT 20; + +-- Functions round(), roundBankers(), floor(), ceil() and trunc() accept non-const 'scale' arguments +SELECT toFloat32((number - 10) / 10) AS x, round(x, materialize(1)), roundBankers(x, materialize(1)), floor(x, materialize(1)), ceil(x, materialize(1)), trunc(x, materialize(1)) FROM system.numbers LIMIT 20; +SELECT toFloat64((number - 10) / 10) AS x, round(x, materialize(1)), roundBankers(x, materialize(1)), floor(x, materialize(1)), ceil(x, materialize(1)), trunc(x, materialize(1)) FROM system.numbers LIMIT 20; +SELECT toUInt8(number) AS x, round(x, materialize(-1)), roundBankers(x, materialize(-1)), floor(x, materialize(-1)), ceil(x, materialize(-1)), trunc(x, materialize(-1)) FROM system.numbers LIMIT 20; +SELECT toUInt16(number) AS x, round(x, materialize(-1)), roundBankers(x, materialize(-1)), floor(x, materialize(-1)), ceil(x, materialize(-1)), trunc(x, materialize(-1)) FROM system.numbers LIMIT 20; +SELECT toUInt32(number) AS x, round(x, materialize(-1)), roundBankers(x, materialize(-1)), floor(x, materialize(-1)), ceil(x, materialize(-1)), trunc(x, materialize(-1)) FROM system.numbers LIMIT 20; +SELECT toUInt64(number) AS x, round(x, materialize(-1)), roundBankers(x, materialize(-1)), floor(x, materialize(-1)), ceil(x, materialize(-1)), trunc(x, materialize(-1)) FROM system.numbers LIMIT 20; + +SELECT toInt8(number - 10) AS x, round(x, materialize(-1)), roundBankers(x, materialize(-1)), floor(x, materialize(-1)), ceil(x, materialize(-1)), trunc(x, materialize(-1)) FROM system.numbers LIMIT 20; +SELECT toInt16(number - 10) AS x, round(x, materialize(-1)), roundBankers(x, materialize(-1)), floor(x, materialize(-1)), ceil(x, materialize(-1)), trunc(x, materialize(-1)) FROM system.numbers LIMIT 20; +SELECT toInt32(number - 10) AS x, round(x, materialize(-1)), roundBankers(x, materialize(-1)), floor(x, materialize(-1)), ceil(x, materialize(-1)), trunc(x, materialize(-1)) FROM system.numbers LIMIT 20; +SELECT toInt64(number - 10) AS x, round(x, materialize(-1)), roundBankers(x, materialize(-1)), floor(x, materialize(-1)), ceil(x, materialize(-1)), trunc(x, materialize(-1)) FROM system.numbers LIMIT 20; +SELECT toFloat32(number - 10) AS x, round(x, materialize(-1)), roundBankers(x, materialize(-1)), floor(x, materialize(-1)), ceil(x, materialize(-1)), trunc(x, materialize(-1)) FROM system.numbers LIMIT 20; +SELECT toFloat64(number - 10) AS x, round(x, materialize(-1)), roundBankers(x, materialize(-1)), floor(x, materialize(-1)), ceil(x, materialize(-1)), trunc(x, materialize(-1)) FROM system.numbers LIMIT 20; + +SELECT toUInt8(number) AS x, round(x, materialize(-2)), roundBankers(x, materialize(-2)), floor(x, materialize(-2)), ceil(x, materialize(-2)), trunc(x, materialize(-2)) FROM system.numbers LIMIT 20; +SELECT toUInt16(number) AS x, round(x, materialize(-2)), roundBankers(x, materialize(-2)), floor(x, materialize(-2)), ceil(x, materialize(-2)), trunc(x, materialize(-2)) FROM system.numbers LIMIT 20; +SELECT toUInt32(number) AS x, round(x, materialize(-2)), roundBankers(x, materialize(-2)), floor(x, materialize(-2)), ceil(x, materialize(-2)), trunc(x, materialize(-2)) FROM system.numbers LIMIT 20; +SELECT toUInt64(number) AS x, round(x, materialize(-2)), roundBankers(x, materialize(-2)), floor(x, materialize(-2)), ceil(x, materialize(-2)), trunc(x, materialize(-2)) FROM system.numbers LIMIT 20; +SELECT toInt8(number - 10) AS x, round(x, materialize(-2)), roundBankers(x, materialize(-2)), floor(x, materialize(-2)), ceil(x, materialize(-2)), trunc(x, materialize(-2)) FROM system.numbers LIMIT 20; +SELECT toInt16(number - 10) AS x, round(x, materialize(-2)), roundBankers(x, materialize(-2)), floor(x, materialize(-2)), ceil(x, materialize(-2)), trunc(x, materialize(-2)) FROM system.numbers LIMIT 20; +SELECT toInt32(number - 10) AS x, round(x, materialize(-2)), roundBankers(x, materialize(-2)), floor(x, materialize(-2)), ceil(x, materialize(-2)), trunc(x, materialize(-2)) FROM system.numbers LIMIT 20; +SELECT toInt64(number - 10) AS x, round(x, materialize(-2)), roundBankers(x, materialize(-2)), floor(x, materialize(-2)), ceil(x, materialize(-2)), trunc(x, materialize(-2)) FROM system.numbers LIMIT 20; +SELECT toFloat32(number - 10) AS x, round(x, materialize(-2)), roundBankers(x, materialize(-2)), floor(x, materialize(-2)), ceil(x, materialize(-2)), trunc(x, materialize(-2)) FROM system.numbers LIMIT 20; +SELECT toFloat64(number - 10) AS x, round(x, materialize(-2)), roundBankers(x, materialize(-2)), floor(x, materialize(-2)), ceil(x, materialize(-2)), trunc(x, materialize(-2)) FROM system.numbers LIMIT 20; + +SELECT toString('CHECKPOINT1'); + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab ( + id Int32, + scale Int16, + u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, + i8 Int8, i16 Int16, i32 Int32, i64 Int64, + f32 Float32, f64 Float64 +) ENGINE = Memory; + +INSERT INTO tab SELECT number , 0, number, number, number, number, number, number, number, number, number, number, FROM system.numbers LIMIT 20; +INSERT INTO tab SELECT number+20 , 0, number+10, number+10, number+10, number+10, number-10, number-10, number-10, number-10, (toFloat32(number)-10)/10, (toFloat64(number)-10)/10, FROM system.numbers LIMIT 20; +INSERT INTO tab SELECT number+40 , -1, number, number, number, number, number, number, number, number, number, number, FROM system.numbers LIMIT 20; +INSERT INTO tab SELECT number+60 , -1, number+10, number+10, number+10, number+10, number-10, number-10, number-10, number-10, (toFloat32(number)-10)/10, (toFloat64(number)-10)/10, FROM system.numbers LIMIT 20; +INSERT INTO tab SELECT number+80 , -2, number, number, number, number, number, number, number, number, number, number, FROM system.numbers LIMIT 20; +INSERT INTO tab SELECT number+100, -2, number+10, number+10, number+10, number+10, number-10, number-10, number-10, number-10, (toFloat32(number)-10)/10, (toFloat64(number)-10)/10, FROM system.numbers LIMIT 20; + +INSERT INTO tab SELECT number+200, -number, 0, 0, 0, 0, 0, 0, 0, 0, 12345.6789, 12345.6789, FROM system.numbers LIMIT 10; +INSERT INTO tab SELECT number+210, -number, 0, 0, 0, 0, 0, 0, 0, 0, 12345.6789, 12345.6789, FROM system.numbers LIMIT 10; + +INSERT INTO tab VALUES (300, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2.0, 2.0); +INSERT INTO tab VALUES (301, 4, 20, 20, 20, 20, 20, 20, 20, 20, 20.0, 20.0); +INSERT INTO tab VALUES (302, 4, 200, 200, 200, 200, 200, 200, 200, 200, 200.0, 200.0); +INSERT INTO tab VALUES (303, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5.0, 5.0); +INSERT INTO tab VALUES (304, 4, 50, 50, 50, 50, 50, 50, 50, 50, 50.0, 50.0); +INSERT INTO tab VALUES (305, 4, 500, 500, 500, 500, 500, 500, 500, 500, 500.0, 500.0); + +SELECT toString('id u8 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale)'); +SELECT id, u8 AS x, scale, round(x, scale), roundBankers(x, scale), floor(x, scale), ceil(x, scale), trunc(x, scale) FROM tab ORDER BY id; +SELECT toString('id u16 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale)'); +SELECT id, u16 AS x, scale, round(x, scale), roundBankers(x, scale), floor(x, scale), ceil(x, scale), trunc(x, scale) FROM tab ORDER BY id; +SELECT toString('id u32 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale)'); +SELECT id, u32 AS x, scale, round(x, scale), roundBankers(x, scale), floor(x, scale), ceil(x, scale), trunc(x, scale) FROM tab ORDER BY id; +SELECT toString('id u64 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale)'); +SELECT id, u64 AS x, scale, round(x, scale), roundBankers(x, scale), floor(x, scale), ceil(x, scale), trunc(x, scale) FROM tab ORDER BY id; +SELECT toString('id i8 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale)'); +SELECT id, i8 AS x, scale, round(x, scale), roundBankers(x, scale), floor(x, scale), ceil(x, scale), trunc(x, scale) FROM tab ORDER BY id; +SELECT toString('id i16 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale)'); +SELECT id, i16 AS x, scale, round(x, scale), roundBankers(x, scale), floor(x, scale), ceil(x, scale), trunc(x, scale) FROM tab ORDER BY id; +SELECT toString('id i32 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale)'); +SELECT id, i32 AS x, scale, round(x, scale), roundBankers(x, scale), floor(x, scale), ceil(x, scale), trunc(x, scale) FROM tab ORDER BY id; +SELECT toString('id i64 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale)'); +SELECT id, i64 AS x, scale, round(x, scale), roundBankers(x, scale), floor(x, scale), ceil(x, scale), trunc(x, scale) FROM tab ORDER BY id; +SELECT toString('id f32 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale)'); +SELECT id, f32 AS x, scale, round(x, scale), roundBankers(x, scale), floor(x, scale), ceil(x, scale), trunc(x, scale) FROM tab ORDER BY id; +SELECT toString('id f64 scale round(u8, scale) roundBankers(x, scale) floor(x, scale) ceil(x, scale) trunc(x, scale)'); +SELECT id, f64 AS x, scale, round(x, scale), roundBankers(x, scale), floor(x, scale), ceil(x, scale), trunc(x, scale) FROM tab ORDER BY id; + +DROP TABLE tab; +-- +SELECT toString('CHECKPOINT2'); + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab ( + id Int32, + scale Int16, + d32 Decimal32(4), d64 Decimal64(4), d128 Decimal128(4), d256 Decimal256(4) +) ENGINE = Memory; + +INSERT INTO tab VALUES (1, 6, toDecimal32('42.42', 4), toDecimal64('42.42', 4), toDecimal128('42.42', 4), toDecimal256('42.42', 4)); +INSERT INTO tab SELECT 2 , 6, cos(d32), cos(d64), cos(d128), cos(d256) FROM tab WHERE id = 1; +INSERT INTO tab SELECT 3 , 6, sqrt(d32), sqrt(d64), sqrt(d128), sqrt(d256) FROM tab WHERE id = 1; +INSERT INTO tab SELECT 4 , 6, lgamma(d32), lgamma(d64), lgamma(d128), lgamma(d256) FROM tab WHERE id = 1; +INSERT INTO tab SELECT 5 , 6, tgamma(d32)/1e50, tgamma(d64)/1e50, tgamma(d128)/1e50, tgamma(d256)/1e50 FROM tab WHERE id = 1; +INSERT INTO tab SELECT 6 , 8, sin(d32), sin(d64), sin(d128), sin(d256) FROM tab WHERE id = 1; +INSERT INTO tab SELECT 7 , 8, cos(d32), cos(d64), cos(d128), cos(d256) FROM tab WHERE id = 1; +INSERT INTO tab SELECT 8 , 8, log(d32), log(d64), log(d128), log(d256) FROM tab WHERE id = 1; +INSERT INTO tab SELECT 9 , 8, log2(d32), log2(d64), log2(d128), log2(d256) FROM tab WHERE id = 1; +INSERT INTO tab SELECT 10, 8, log10(d32), log10(d64), log10(d128), log10(d256) FROM tab WHERE id = 1; + +SELECT id, round(d32, scale), round(d64, scale), round(d128, scale), round(d256, scale) FROM tab ORDER BY id; + +DROP TABLE tab; + +SELECT round(1, 1); +SELECT round(materialize(1), materialize(1)); +SELECT round(1, materialize(1)); --{serverError ILLEGAL_COLUMN} +SELECT round(materialize(1), 1); +SELECT materialize(10.1) AS x, ceil(x, toUInt256(123)); --{serverError ILLEGAL_TYPE_OF_ARGUMENT} diff --git a/tests/queries/0_stateless/03165_storage_merge_view_prewhere.reference b/tests/queries/0_stateless/03165_storage_merge_view_prewhere.reference new file mode 100644 index 00000000000..4cd7f2cb141 --- /dev/null +++ b/tests/queries/0_stateless/03165_storage_merge_view_prewhere.reference @@ -0,0 +1,8 @@ +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever diff --git a/tests/queries/0_stateless/03165_storage_merge_view_prewhere.sql b/tests/queries/0_stateless/03165_storage_merge_view_prewhere.sql new file mode 100644 index 00000000000..97651d1b0fd --- /dev/null +++ b/tests/queries/0_stateless/03165_storage_merge_view_prewhere.sql @@ -0,0 +1,41 @@ +-- Tags: distributed + +DROP TABLE IF EXISTS ids; +DROP TABLE IF EXISTS data; +DROP TABLE IF EXISTS data2; + +CREATE TABLE ids (id UUID, whatever String) Engine=MergeTree ORDER BY tuple(); +INSERT INTO ids VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', 'whatever'); + +CREATE TABLE data (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); +INSERT INTO data VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-01', 'CREATED'); + +CREATE TABLE data2 (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); +INSERT INTO data2 VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-02', 'CREATED'); + +SELECT + id, + whatever +FROM ids AS l +INNER JOIN merge(currentDatabase(), 'data*') AS s ON l.id = s.id +WHERE (status IN ['CREATED', 'CREATING']) +ORDER BY event_time DESC +; + +SELECT + id, + whatever +FROM ids AS l +INNER JOIN clusterAllReplicas(test_cluster_two_shards, merge(currentDatabase(), 'data*')) AS s ON l.id = s.id +WHERE (status IN ['CREATED', 'CREATING']) +ORDER BY event_time DESC +; + +SELECT + id, + whatever +FROM ids AS l +INNER JOIN view(SELECT * FROM merge(currentDatabase(), 'data*')) AS s ON l.id = s.id +WHERE (status IN ['CREATED', 'CREATING']) +ORDER BY event_time DESC +; diff --git a/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.reference b/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.reference new file mode 100644 index 00000000000..4fb6812cb4f --- /dev/null +++ b/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.reference @@ -0,0 +1,83 @@ +-------- Bloom filter -------- + +-- No skip for prefix +Parts: 1/1 +Parts: 1/1 +1 Service is not ready + +-- Skip for prefix with complete token +Parts: 1/1 +Parts: 0/1 + +-- No skip for suffix +Parts: 1/1 +Parts: 1/1 +1 Service is not ready + +-- Skip for suffix with complete token +Parts: 1/1 +Parts: 0/1 + +-- No skip for substring +Parts: 1/1 +Parts: 1/1 +1 Service is not ready + +-- Skip for substring with complete token +Parts: 1/1 +Parts: 0/1 + +-- No skip for multiple substrings +Parts: 1/1 +Parts: 1/1 +1 Service is not ready + +-- Skip for multiple substrings with complete tokens +Parts: 1/1 +Parts: 0/1 + +-- No skip for multiple non-existsing substrings, only one with complete token +Parts: 1/1 +Parts: 1/1 + +-------- GIN filter -------- + +-- No skip for prefix +Parts: 1/1 +Parts: 1/1 +1 Service is not ready + +-- Skip for prefix with complete token +Parts: 1/1 +Parts: 0/1 + +-- No skip for suffix +Parts: 1/1 +Parts: 1/1 +1 Service is not ready + +-- Skip for suffix with complete token +Parts: 1/1 +Parts: 0/1 + +-- No skip for substring +Parts: 1/1 +Parts: 1/1 +1 Service is not ready + +-- Skip for substring with complete token +Parts: 1/1 +Parts: 0/1 + +-- No skip for multiple substrings +Parts: 1/1 +Parts: 1/1 +1 Service is not ready + +-- Skip for multiple substrings with complete tokens +Parts: 1/1 +Parts: 0/1 + +-- No skip for multiple non-existsing substrings, only one with complete token +Parts: 1/1 +Parts: 1/1 diff --git a/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql b/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql new file mode 100644 index 00000000000..bae98bd1eb6 --- /dev/null +++ b/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql @@ -0,0 +1,231 @@ +SELECT '-------- Bloom filter --------'; +SELECT ''; +DROP TABLE IF EXISTS 03165_token_bf; + +SET allow_experimental_full_text_index=1; + +CREATE TABLE 03165_token_bf +( + id Int64, + message String, + INDEX idx_message message TYPE tokenbf_v1(32768, 3, 2) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO 03165_token_bf VALUES(1, 'Service is not ready'); + +SELECT '-- No skip for prefix'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_bf WHERE startsWith(message, 'Serv') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_bf WHERE startsWith(message, 'Serv'); + +SELECT ''; +SELECT '-- Skip for prefix with complete token'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_bf WHERE startsWith(message, 'Serv i') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_bf WHERE startsWith(message, 'Serv i'); + +SELECT ''; +SELECT '-- No skip for suffix'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_bf WHERE endsWith(message, 'eady') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_bf WHERE endsWith(message, 'eady'); + +SELECT ''; +SELECT '-- Skip for suffix with complete token'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_bf WHERE endsWith(message, ' eady') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_bf WHERE endsWith(message, ' eady'); + +SELECT ''; +SELECT '-- No skip for substring'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_bf WHERE match(message, 'no') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_bf WHERE match(message, 'no'); + +SELECT ''; +SELECT '-- Skip for substring with complete token'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_bf WHERE match(message, ' xyz ') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_bf WHERE match(message, ' xyz '); + +SELECT ''; +SELECT '-- No skip for multiple substrings'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_bf WHERE multiSearchAny(message, ['ce', 'no']) +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_bf WHERE multiSearchAny(message, ['ce', 'no']); + +SELECT ''; +SELECT '-- Skip for multiple substrings with complete tokens'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_bf WHERE multiSearchAny(message, [' wx ', ' yz ']) +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_bf WHERE multiSearchAny(message, [' wx ', ' yz ']); + +SELECT ''; +SELECT '-- No skip for multiple non-existsing substrings, only one with complete token'; +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_bf WHERE multiSearchAny(message, [' wx ', 'yz']) +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_bf WHERE multiSearchAny(message, [' wx ', 'yz']); + +DROP TABLE IF EXISTS 03165_token_bf; + +SELECT ''; +SELECT '-------- GIN filter --------'; +SELECT ''; + +SET allow_experimental_inverted_index=1; +DROP TABLE IF EXISTS 03165_token_ft; +CREATE TABLE 03165_token_ft +( + id Int64, + message String, + INDEX idx_message message TYPE full_text() GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +-- Full text index works only with full parts. +SETTINGS min_bytes_for_full_part_storage=0; + +INSERT INTO 03165_token_ft VALUES(1, 'Service is not ready'); + +SELECT '-- No skip for prefix'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_ft WHERE startsWith(message, 'Serv') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_ft WHERE startsWith(message, 'Serv'); + +SELECT ''; +SELECT '-- Skip for prefix with complete token'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_ft WHERE startsWith(message, 'Serv i') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_ft WHERE startsWith(message, 'Serv i'); + +SELECT ''; +SELECT '-- No skip for suffix'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_ft WHERE endsWith(message, 'eady') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_ft WHERE endsWith(message, 'eady'); + +SELECT ''; +SELECT '-- Skip for suffix with complete token'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_ft WHERE endsWith(message, ' eady') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_ft WHERE endsWith(message, ' eady'); + +SELECT ''; +SELECT '-- No skip for substring'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_ft WHERE match(message, 'no') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_ft WHERE match(message, 'no'); + +SELECT ''; +SELECT '-- Skip for substring with complete token'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_ft WHERE match(message, ' xyz ') +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_ft WHERE match(message, ' xyz '); + +SELECT ''; +SELECT '-- No skip for multiple substrings'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_ft WHERE multiSearchAny(message, ['ce', 'no']) +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_ft WHERE multiSearchAny(message, ['ce', 'no']); + +SELECT ''; +SELECT '-- Skip for multiple substrings with complete tokens'; + +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_ft WHERE multiSearchAny(message, [' wx ', ' yz ']) +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_ft WHERE multiSearchAny(message, [' wx ', ' yz ']); + +SELECT ''; +SELECT '-- No skip for multiple non-existsing substrings, only one with complete token'; +SELECT trim(explain) +FROM ( + EXPLAIN indexes = 1 SELECT * FROM 03165_token_ft WHERE multiSearchAny(message, [' wx ', 'yz']) +) +WHERE explain LIKE '%Parts:%'; + +SELECT * FROM 03165_token_ft WHERE multiSearchAny(message, [' wx ', 'yz']); diff --git a/tests/queries/0_stateless/03166_mv_prewhere_duplicating_name_bug.reference b/tests/queries/0_stateless/03166_mv_prewhere_duplicating_name_bug.reference new file mode 100644 index 00000000000..b50fdcee209 --- /dev/null +++ b/tests/queries/0_stateless/03166_mv_prewhere_duplicating_name_bug.reference @@ -0,0 +1,2 @@ +a b +a b diff --git a/tests/queries/0_stateless/03166_mv_prewhere_duplicating_name_bug.sql b/tests/queries/0_stateless/03166_mv_prewhere_duplicating_name_bug.sql new file mode 100644 index 00000000000..e32d23920dd --- /dev/null +++ b/tests/queries/0_stateless/03166_mv_prewhere_duplicating_name_bug.sql @@ -0,0 +1,7 @@ +create table src (x Int64) engine = Log; +create table dst (s String, lc LowCardinality(String)) engine MergeTree order by s; +create materialized view mv to dst (s String, lc String) as select 'a' as s, toLowCardinality('b') as lc from src; +insert into src values (1); + +select s, lc from mv where not ignore(lc) settings allow_experimental_analyzer=0; +select s, lc from mv where not ignore(lc) settings allow_experimental_analyzer=1; diff --git a/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql b/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql index bb2f5e94d05..5fc71598e47 100644 --- a/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql +++ b/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql @@ -14,7 +14,7 @@ CREATE TABLE tab ( event Int8 ) ENGINE = MergeTree ORDER BY name -SETTINGS allow_experimental_optimized_row_order = true; +SETTINGS optimize_row_order = true; INSERT INTO tab VALUES ('Igor', 3), ('Egor', 1), ('Egor', 2), ('Igor', 2), ('Igor', 1); SELECT * FROM tab ORDER BY name SETTINGS max_threads=1; @@ -34,7 +34,7 @@ CREATE TABLE tab ( flag String ) ENGINE = MergeTree ORDER BY () -SETTINGS allow_experimental_optimized_row_order = True; +SETTINGS optimize_row_order = True; INSERT INTO tab VALUES ('Bob', 4, 100, '1'), ('Nikita', 2, 54, '1'), ('Nikita', 1, 228, '1'), ('Alex', 4, 83, '1'), ('Alex', 4, 134, '1'), ('Alex', 1, 65, '0'), ('Alex', 4, 134, '1'), ('Bob', 2, 53, '0'), ('Alex', 4, 83, '0'), ('Alex', 1, 63, '1'), ('Bob', 2, 53, '1'), ('Alex', 4, 192, '1'), ('Alex', 2, 128, '1'), ('Nikita', 2, 148, '0'), ('Bob', 4, 177, '0'), ('Nikita', 1, 173, '0'), ('Alex', 1, 239, '0'), ('Alex', 1, 63, '0'), ('Alex', 2, 224, '1'), ('Bob', 4, 177, '0'), ('Alex', 2, 128, '1'), ('Alex', 4, 134, '0'), ('Alex', 4, 83, '1'), ('Bob', 4, 100, '0'), ('Nikita', 2, 54, '1'), ('Alex', 1, 239, '1'), ('Bob', 2, 187, '1'), ('Alex', 1, 65, '1'), ('Bob', 2, 53, '1'), ('Alex', 2, 224, '0'), ('Alex', 4, 192, '0'), ('Nikita', 1, 173, '1'), ('Nikita', 2, 148, '1'), ('Bob', 2, 187, '1'), ('Nikita', 2, 208, '1'), ('Nikita', 2, 208, '0'), ('Nikita', 1, 228, '0'), ('Nikita', 2, 148, '0'); SELECT * FROM tab SETTINGS max_threads=1; @@ -58,7 +58,7 @@ CREATE TABLE tab ( flag Nullable(Int32) ) ENGINE = MergeTree ORDER BY (flag, money) -SETTINGS allow_experimental_optimized_row_order = True, allow_nullable_key = True; +SETTINGS optimize_row_order = True, allow_nullable_key = True; INSERT INTO tab VALUES ('AB', 0, 42, Null), ('AB', 0, 42, Null), ('A', 1, 42, Null), ('AB', 1, 9.81, 0), ('B', 0, 42, Null), ('B', -1, 3.14, Null), ('B', 1, 2.7, 1), ('B', 0, 42, 1), ('A', 1, 42, 1), ('B', 1, 42, Null), ('B', 0, 2.7, 1), ('A', 0, 2.7, 1), ('B', 2, 3.14, Null), ('A', 0, 3.14, Null), ('A', 1, 2.7, 1), ('A', 1, 42, Null); SELECT * FROM tab ORDER BY (flag, money) SETTINGS max_threads=1; @@ -89,7 +89,7 @@ CREATE TABLE tab ( tuple_column Tuple(UInt256) ) ENGINE = MergeTree() ORDER BY (fixed_str, event_date) -SETTINGS allow_experimental_optimized_row_order = True; +SETTINGS optimize_row_order = True; INSERT INTO tab VALUES ('A', '2020-01-01', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('A', '2020-01-01', [0.0, 1.1], NULL, 'example', {}, (26)), ('A', '2020-01-01', [2.2, 1.1], 1, 'some other string', {'key2':'value2'}, (5)), ('A', '2020-01-02', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('A', '2020-01-02', [0.0, 2.2], 10, 'example', {}, (26)), ('A', '2020-01-02', [2.2, 1.1], 1, 'some other string', {'key2':'value2'}, (5)), ('B', '2020-01-04', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('B', '2020-01-04', [0.0, 2.2], Null, 'example', {}, (26)), ('B', '2020-01-04', [2.2, 1.1], 1, 'some string', {'key2':'value2'}, (5)), ('B', '2020-01-05', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('B', '2020-01-05', [0.0, 2.2], Null, 'example', {}, (26)), ('B', '2020-01-05', [2.2, 1.1], 1, 'some other string', {'key':'value'}, (5)), ('C', '2020-01-04', [0.0, 1.1], 10, 'some string', {'key':'value'}, (5)), ('C', '2020-01-04', [0.0, 2.2], Null, 'example', {}, (26)), ('C', '2020-01-04', [2.2, 1.1], 1, 'some other string', {'key2':'value2'}, (5)); diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.reference b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.reference new file mode 100644 index 00000000000..86f79bea4ba --- /dev/null +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.reference @@ -0,0 +1,33 @@ +200 +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + Filter ((WHERE + Change column names to column identifiers)) + ReadFromMergeTree (default.t_ind_merge_1) + Indexes: + PrimaryKey + Condition: true + Parts: 2/2 + Granules: 32/32 + Skip + Name: idx_b + Description: minmax GRANULARITY 1 + Parts: 2/2 + Granules: 4/32 +200 +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + Filter ((WHERE + Change column names to column identifiers)) + ReadFromMergeTree (default.t_ind_merge_1) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 32/32 + Skip + Name: idx_b + Description: minmax GRANULARITY 1 + Parts: 1/1 + Granules: 4/32 +4 1 3 diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql new file mode 100644 index 00000000000..d3e3b38a3cb --- /dev/null +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql @@ -0,0 +1,39 @@ +DROP TABLE IF EXISTS t_ind_merge_1; + +SET allow_experimental_analyzer = 1; + +CREATE TABLE t_ind_merge_1 (a UInt64, b UInt64, c UInt64, d UInt64, INDEX idx_b b TYPE minmax) +ENGINE = MergeTree +ORDER BY a SETTINGS + index_granularity = 64, + merge_max_block_size = 8192, + vertical_merge_algorithm_min_rows_to_activate = 1, + vertical_merge_algorithm_min_columns_to_activate = 1, + min_bytes_for_wide_part = 0, + min_bytes_for_full_part_storage = 0; + +INSERT INTO t_ind_merge_1 SELECT number, number, rand(), rand() FROM numbers(1000); +INSERT INTO t_ind_merge_1 SELECT number, number, rand(), rand() FROM numbers(1000); + +SELECT count() FROM t_ind_merge_1 WHERE b < 100 SETTINGS force_data_skipping_indices = 'idx_b'; +EXPLAIN indexes = 1 SELECT count() FROM t_ind_merge_1 WHERE b < 100; + +OPTIMIZE TABLE t_ind_merge_1 FINAL; + +SELECT count() FROM t_ind_merge_1 WHERE b < 100 SETTINGS force_data_skipping_indices = 'idx_b'; +EXPLAIN indexes = 1 SELECT count() FROM t_ind_merge_1 WHERE b < 100; + +SYSTEM FLUSH LOGS; + +WITH + (SELECT uuid FROM system.tables WHERE database = currentDatabase() AND table = 't_ind_merge_1') AS uuid, + extractAllGroupsVertical(message, 'containing (\\d+) columns \((\\d+) merged, (\\d+) gathered\)')[1] AS groups +SELECT + groups[1] AS total, + groups[2] AS merged, + groups[3] AS gathered +FROM system.text_log +WHERE ((query_id = uuid || '::all_1_2_1') OR (query_id = currentDatabase() || '.t_ind_merge_1::all_1_2_1')) AND notEmpty(groups) +ORDER BY event_time_microseconds; + +DROP TABLE t_ind_merge_1; diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.reference b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.reference new file mode 100644 index 00000000000..4c2f01294a4 --- /dev/null +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.reference @@ -0,0 +1 @@ +6 3 3 diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql new file mode 100644 index 00000000000..b749e0c84b0 --- /dev/null +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t_ind_merge_2; + +CREATE TABLE t_ind_merge_2 ( + a UInt64, + b UInt64, + c UInt64, + d UInt64, + e UInt64, + f UInt64, + INDEX idx_a a TYPE minmax, + INDEX idx_b b TYPE minmax, + INDEX idx_cd c * d TYPE minmax, + INDEX idx_d1 d TYPE minmax, + INDEX idx_d2 d + 7 TYPE set(3), + INDEX idx_e e * 3 TYPE set(3)) +ENGINE = MergeTree +ORDER BY a SETTINGS + index_granularity = 64, + vertical_merge_algorithm_min_rows_to_activate = 1, + vertical_merge_algorithm_min_columns_to_activate = 1, + min_bytes_for_wide_part = 0, + min_bytes_for_full_part_storage = 0; + +INSERT INTO t_ind_merge_2 SELECT number, number, rand(), rand(), rand(), rand() FROM numbers(1000); +INSERT INTO t_ind_merge_2 SELECT number, number, rand(), rand(), rand(), rand() FROM numbers(1000); + +OPTIMIZE TABLE t_ind_merge_2 FINAL; +SYSTEM FLUSH LOGS; + +--- merged: a, c, d; gathered: b, e, f +WITH + (SELECT uuid FROM system.tables WHERE database = currentDatabase() AND table = 't_ind_merge_2') AS uuid, + extractAllGroupsVertical(message, 'containing (\\d+) columns \((\\d+) merged, (\\d+) gathered\)')[1] AS groups +SELECT + groups[1] AS total, + groups[2] AS merged, + groups[3] AS gathered +FROM system.text_log +WHERE ((query_id = uuid || '::all_1_2_1') OR (query_id = currentDatabase() || '.t_ind_merge_2::all_1_2_1')) AND notEmpty(groups) +ORDER BY event_time_microseconds; + +DROP TABLE t_ind_merge_2; diff --git a/tests/queries/0_stateless/03167_base64_url_functions.reference b/tests/queries/0_stateless/03167_base64_url_functions.reference new file mode 100644 index 00000000000..2a0d0013609 --- /dev/null +++ b/tests/queries/0_stateless/03167_base64_url_functions.reference @@ -0,0 +1,10 @@ +https://clickhouse.com aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ https://clickhouse.com https://clickhouse.com +12? MTI_ 12? 12? +https://www.google.com/search?q=clickhouse+base64+decode&sca_esv=739f8bb380e4c7ed&ei=TfRiZqCDIrmnwPAP2KLRkA8&ved=0ahUKEwjg3ZHitsmGAxW5ExAIHVhRFPIQ4dUDCBA&uact=5&oq=clickhouse+base64+decode aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS9zZWFyY2g_cT1jbGlja2hvdXNlK2Jhc2U2NCtkZWNvZGUmc2NhX2Vzdj03MzlmOGJiMzgwZTRjN2VkJmVpPVRmUmlacUNESXJtbndQQVAyS0xSa0E4JnZlZD0wYWhVS0V3amczWkhpdHNtR0F4VzVFeEFJSFZoUkZQSVE0ZFVEQ0JBJnVhY3Q9NSZvcT1jbGlja2hvdXNlK2Jhc2U2NCtkZWNvZGU https://www.google.com/search?q=clickhouse+base64+decode&sca_esv=739f8bb380e4c7ed&ei=TfRiZqCDIrmnwPAP2KLRkA8&ved=0ahUKEwjg3ZHitsmGAxW5ExAIHVhRFPIQ4dUDCBA&uact=5&oq=clickhouse+base64+decode https://www.google.com/search?q=clickhouse+base64+decode&sca_esv=739f8bb380e4c7ed&ei=TfRiZqCDIrmnwPAP2KLRkA8&ved=0ahUKEwjg3ZHitsmGAxW5ExAIHVhRFPIQ4dUDCBA&uact=5&oq=clickhouse+base64+decode +aHR0cHM6Ly9jbGlj https://clic https://clic +aHR0cHM6Ly9jbGlja2g https://clickh https://clickh +aHR0cHM6Ly9jbGljaw https://click https://click + + + +https://clickhouse.com aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ https://clickhouse.com https://clickhouse.com diff --git a/tests/queries/0_stateless/03167_base64_url_functions.sql b/tests/queries/0_stateless/03167_base64_url_functions.sql new file mode 100644 index 00000000000..6c394ba6c3a --- /dev/null +++ b/tests/queries/0_stateless/03167_base64_url_functions.sql @@ -0,0 +1,36 @@ +-- Tags: no-fasttest +-- no-fasttest because aklomp-base64 library is required + +-- incorrect number of arguments +SELECT base64URLEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryBase64URLDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLEncode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64URLDecode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryBase64URLDecode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +-- test with valid inputs + +SELECT 'https://clickhouse.com' AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); +SELECT '12?' AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); +SELECT 'https://www.google.com/search?q=clickhouse+base64+decode&sca_esv=739f8bb380e4c7ed&ei=TfRiZqCDIrmnwPAP2KLRkA8&ved=0ahUKEwjg3ZHitsmGAxW5ExAIHVhRFPIQ4dUDCBA&uact=5&oq=clickhouse+base64+decode' AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); + +-- encoded value has no padding +SELECT 'aHR0cHM6Ly9jbGlj' AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); +-- encoded value has one-byte padding +SELECT 'aHR0cHM6Ly9jbGlja2g' AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); +-- encoded value has two-bytes padding +SELECT 'aHR0cHM6Ly9jbGljaw' AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); + +-- test with invalid inputs + +SELECT base64URLDecode('https://clickhouse.com'); -- { serverError INCORRECT_DATA } +SELECT tryBase64URLDecode('https://clickhouse.com'); +SELECT base64URLDecode('12?'); -- { serverError INCORRECT_DATA } +SELECT tryBase64URLDecode('12?'); +SELECT base64URLDecode('aHR0cHM6Ly9jbGlja'); -- { serverError INCORRECT_DATA } +SELECT tryBase64URLDecode('aHR0cHM6Ly9jbGlja'); + +-- test FixedString argument + +SELECT toFixedString('https://clickhouse.com', 22) AS original, base64URLEncode(original) AS encoded, base64URLDecode(encoded), tryBase64URLDecode(encoded); diff --git a/tests/queries/0_stateless/03167_boom_filter_index_with_map.reference.j2 b/tests/queries/0_stateless/03167_boom_filter_index_with_map.reference.j2 new file mode 100644 index 00000000000..71dc879f28e --- /dev/null +++ b/tests/queries/0_stateless/03167_boom_filter_index_with_map.reference.j2 @@ -0,0 +1,4 @@ +{% for type in ['Int8', 'Int16', 'Int32', 'Int64', 'UInt8', 'UInt16', 'UInt32', 'UInt64'] -%} +{'xxx':56} +{56:'xxx'} +{% endfor -%} diff --git a/tests/queries/0_stateless/03167_boom_filter_index_with_map.sql.j2 b/tests/queries/0_stateless/03167_boom_filter_index_with_map.sql.j2 new file mode 100644 index 00000000000..4147bd84e8e --- /dev/null +++ b/tests/queries/0_stateless/03167_boom_filter_index_with_map.sql.j2 @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS boom_filter_map_1; +DROP TABLE IF EXISTS boom_filter_map_2; + +{% for type in ['Int8', 'Int16', 'Int32', 'Int64', 'UInt8', 'UInt16', 'UInt32', 'UInt64'] -%} + +CREATE TABLE boom_filter_map_1 +( + `m` Map(String, {{ type }}), + INDEX index_models_value_bloom_filter mapValues(m) TYPE bloom_filter GRANULARITY 1 +) + ENGINE = MergeTree +ORDER BY tuple(); + +CREATE TABLE boom_filter_map_2 +( + `m` Map({{ type }}, String), + INDEX index_models_value_bloom_filter mapKeys(m) TYPE bloom_filter GRANULARITY 1 +) + ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO boom_filter_map_1 (m) values (map('xxx', 56)); +INSERT INTO boom_filter_map_2 (m) values (map(56, 'xxx')); + +SELECT m FROM boom_filter_map_1 WHERE (m['xxx']) = 56; +SELECT m FROM boom_filter_map_2 WHERE (m[56]) = 'xxx'; + +DROP TABLE IF EXISTS boom_filter_map_1; +DROP TABLE IF EXISTS boom_filter_map_2; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03167_empty_tuple_concat.reference b/tests/queries/0_stateless/03167_empty_tuple_concat.reference new file mode 100644 index 00000000000..6a452c185a8 --- /dev/null +++ b/tests/queries/0_stateless/03167_empty_tuple_concat.reference @@ -0,0 +1 @@ +() diff --git a/tests/queries/0_stateless/03167_empty_tuple_concat.sql b/tests/queries/0_stateless/03167_empty_tuple_concat.sql new file mode 100644 index 00000000000..f6fce86f332 --- /dev/null +++ b/tests/queries/0_stateless/03167_empty_tuple_concat.sql @@ -0,0 +1 @@ +SELECT ()||(); diff --git a/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.reference b/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.reference new file mode 100644 index 00000000000..9daeafb9864 --- /dev/null +++ b/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.reference @@ -0,0 +1 @@ +test diff --git a/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.sql b/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.sql new file mode 100644 index 00000000000..6f563d8f2a1 --- /dev/null +++ b/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.sql @@ -0,0 +1 @@ +SELECT ‘test’ AS “column” \ No newline at end of file diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference new file mode 100644 index 00000000000..951910bbe74 --- /dev/null +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.reference @@ -0,0 +1,5 @@ +OK +123 +123 +123 +123 diff --git a/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql new file mode 100644 index 00000000000..1ac5540047a --- /dev/null +++ b/tests/queries/0_stateless/03167_parametrized_view_with_cte.sql @@ -0,0 +1,7 @@ +SET allow_experimental_analyzer=1; +CREATE OR REPLACE VIEW param_test AS SELECT {test_str:String} as s_result; +WITH 'OK' AS s SELECT * FROM param_test(test_str=s); +WITH (SELECT 123) AS s SELECT * FROM param_test(test_str=s); +WITH (SELECT 100 + 20 + 3) AS s SELECT * FROM param_test(test_str=s); +WITH (SELECT number FROM numbers(123, 1)) AS s SELECT * FROM param_test(test_str=s); +WITH CAST(123, 'String') AS s SELECT * FROM param_test(test_str=s); diff --git a/tests/queries/0_stateless/03167_transactions_are_really_disabled.reference b/tests/queries/0_stateless/03167_transactions_are_really_disabled.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03167_transactions_are_really_disabled.sql b/tests/queries/0_stateless/03167_transactions_are_really_disabled.sql new file mode 100644 index 00000000000..e3c86a2d5be --- /dev/null +++ b/tests/queries/0_stateless/03167_transactions_are_really_disabled.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS mv_table; +DROP TABLE IF EXISTS null_table; + +CREATE TABLE null_table (str String) ENGINE = Null; +CREATE MATERIALIZED VIEW mv_table (str String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/transactions_disabled_rmt', '{replica}') ORDER BY str AS SELECT str AS str FROM null_table; + +SET implicit_transaction=1; +set throw_on_unsupported_query_inside_transaction=0; + +INSERT INTO null_table VALUES ('test'); --{serverError NOT_IMPLEMENTED} + +DROP TABLE IF EXISTS mv_table; +DROP TABLE IF EXISTS null_table; diff --git a/tests/queries/0_stateless/03168_cld2_tsan.reference b/tests/queries/0_stateless/03168_cld2_tsan.reference new file mode 100644 index 00000000000..6c3cafd4a6d --- /dev/null +++ b/tests/queries/0_stateless/03168_cld2_tsan.reference @@ -0,0 +1,2 @@ +{'ja':0.62,'fr':0.36} +{'ja':0.62,'fr':0.36} diff --git a/tests/queries/0_stateless/03168_cld2_tsan.sql b/tests/queries/0_stateless/03168_cld2_tsan.sql new file mode 100644 index 00000000000..701a781c472 --- /dev/null +++ b/tests/queries/0_stateless/03168_cld2_tsan.sql @@ -0,0 +1,10 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: depends on cld2 + +-- https://github.com/ClickHouse/ClickHouse/issues/64931 +SELECT detectLanguageMixed(materialize('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.')) +GROUP BY + GROUPING SETS ( + ('a', toUInt256(1)), + (stringToH3(toFixedString(toFixedString('85283473ffffff', 14), 14)))) +SETTINGS allow_experimental_nlp_functions = 1; diff --git a/tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.reference b/tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.sql b/tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.sql new file mode 100644 index 00000000000..4e4cc291e9b --- /dev/null +++ b/tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/64946 +SELECT + multiIf((number % toLowCardinality(toNullable(toUInt128(2)))) = (number % toNullable(2)), toInt8(1), (number % materialize(toLowCardinality(3))) = toUInt128(toNullable(0)), toInt8(materialize(materialize(2))), toInt64(toUInt128(3))) +FROM system.numbers +LIMIT 44857 +FORMAT Null; diff --git a/tests/queries/0_stateless/03168_inconsistent_ast_formatting.reference b/tests/queries/0_stateless/03168_inconsistent_ast_formatting.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03168_inconsistent_ast_formatting.sql b/tests/queries/0_stateless/03168_inconsistent_ast_formatting.sql new file mode 100644 index 00000000000..5333ea29ce7 --- /dev/null +++ b/tests/queries/0_stateless/03168_inconsistent_ast_formatting.sql @@ -0,0 +1,7 @@ +create table a (x `Null`); -- { clientError SYNTAX_ERROR } +create table a (x f(`Null`)); -- { clientError SYNTAX_ERROR } +create table a (x Enum8(f(`Null`, 'World', 2))); -- { clientError SYNTAX_ERROR } +create table a (`value2` Enum8('Hello' = 1, equals(`Null`, 'World', 2), '!' = 3)); -- { clientError SYNTAX_ERROR } + +create table a (x Int8) engine Memory; +create table b empty as a; diff --git a/tests/queries/0_stateless/03168_loop_engine_with_parallel_replicas.reference b/tests/queries/0_stateless/03168_loop_engine_with_parallel_replicas.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03168_loop_engine_with_parallel_replicas.sql b/tests/queries/0_stateless/03168_loop_engine_with_parallel_replicas.sql new file mode 100644 index 00000000000..da4626ad897 --- /dev/null +++ b/tests/queries/0_stateless/03168_loop_engine_with_parallel_replicas.sql @@ -0,0 +1,11 @@ +-- Tags: no-parallel + +DROP DATABASE IF EXISTS 03147_db; +CREATE DATABASE IF NOT EXISTS 03147_db; +CREATE TABLE 03147_db.t (n Int8) ENGINE=MergeTree ORDER BY n; +INSERT INTO 03147_db.t SELECT * FROM numbers(10); +USE 03147_db; + +SET allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 100; + +SELECT * FROM loop(03147_db.t) LIMIT 15 FORMAT Null; diff --git a/tests/queries/0_stateless/03168_query_log_privileges_not_empty.reference b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.reference new file mode 100644 index 00000000000..e3ac97f9945 --- /dev/null +++ b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.reference @@ -0,0 +1,5 @@ +1 +3168 8613 +[] ['SELECT(a, b) ON default.d_03168_query_log'] +[] [] +['SELECT(a, b) ON default.d_03168_query_log'] [] diff --git a/tests/queries/0_stateless/03168_query_log_privileges_not_empty.sh b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.sh new file mode 100755 index 00000000000..9abc635a874 --- /dev/null +++ b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +user_name="u_03168_query_log" +table_name="default.d_03168_query_log" +test_query="select a, b from ${table_name}" + +${CLICKHOUSE_CLIENT_BINARY} --query "drop user if exists ${user_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "create user ${user_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "drop table if exists ${table_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "create table ${table_name} (a UInt64, b UInt64) order by a" + +${CLICKHOUSE_CLIENT_BINARY} --query "insert into table ${table_name} values (3168, 8613)" + +error="$(${CLICKHOUSE_CLIENT_BINARY} --user ${user_name} --query "${test_query}" 2>&1 >/dev/null)" +echo "${error}" | grep -Fc "ACCESS_DENIED" + +${CLICKHOUSE_CLIENT_BINARY} --query "grant select(a, b) on ${table_name} to ${user_name}" + +${CLICKHOUSE_CLIENT_BINARY} --user ${user_name} --query "${test_query}" + +${CLICKHOUSE_CLIENT_BINARY} --query "system flush logs" +${CLICKHOUSE_CLIENT_BINARY} --query "select used_privileges, missing_privileges from system.query_log where query = '${test_query}' and type = 'ExceptionBeforeStart' and current_database = currentDatabase() order by event_time desc limit 1" +${CLICKHOUSE_CLIENT_BINARY} --query "select used_privileges, missing_privileges from system.query_log where query = '${test_query}' and type = 'QueryStart' and current_database = currentDatabase() order by event_time desc limit 1" +${CLICKHOUSE_CLIENT_BINARY} --query "select used_privileges, missing_privileges from system.query_log where query = '${test_query}' and type = 'QueryFinish' and current_database = currentDatabase() order by event_time desc limit 1" + +${CLICKHOUSE_CLIENT_BINARY} --query "drop table ${table_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "drop user ${user_name}" diff --git a/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.reference b/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql b/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql new file mode 100644 index 00000000000..f91aaf39081 --- /dev/null +++ b/tests/queries/0_stateless/03169_cache_complex_dict_short_circuit_bug.sql @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS complex_key_simple_attributes_source_short_circuit_table; +DROP DICTIONARY IF EXISTS cache_dictionary_complex_key_simple_attributes_short_circuit; + +CREATE TABLE complex_key_simple_attributes_source_short_circuit_table +( + id UInt64, + id_key String, + value_first String, + value_second String +) + ENGINE = TinyLog; + +INSERT INTO complex_key_simple_attributes_source_short_circuit_table VALUES(0, 'id_key_0', 'value_0', 'value_second_0'); + +CREATE DICTIONARY cache_dictionary_complex_key_simple_attributes_short_circuit +( + `id` UInt64, + `id_key` String, + `value_first` String DEFAULT 'value_first_default', + `value_second` String DEFAULT 'value_second_default' +) +PRIMARY KEY id, id_key +SOURCE(CLICKHOUSE(TABLE 'complex_key_simple_attributes_source_short_circuit_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 10)); + +SELECT dictGetOrDefault('cache_dictionary_complex_key_simple_attributes_short_circuit', 'value_first', (number, concat(toString(number))), toString(materialize('default'))) AS value_first FROM system.numbers LIMIT 20 FORMAT Null; +SELECT dictGetOrDefault('cache_dictionary_complex_key_simple_attributes_short_circuit', 'value_first', (number, concat(toString(number))), toString(materialize('default'))) AS value_first FROM system.numbers LIMIT 20 FORMAT Null; + +DROP DICTIONARY IF EXISTS cache_dictionary_complex_key_simple_attributes_short_circuit; +DROP TABLE IF EXISTS complex_key_simple_attributes_source_short_circuit_table; diff --git a/tests/queries/0_stateless/03169_display_column_names_in_footer.reference b/tests/queries/0_stateless/03169_display_column_names_in_footer.reference new file mode 100644 index 00000000000..7a9f413a900 --- /dev/null +++ b/tests/queries/0_stateless/03169_display_column_names_in_footer.reference @@ -0,0 +1,2382 @@ + ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ + ┃ number ┃ toTypeName(number) ┃ + ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ + 1. │ 0 │ UInt64 │ + ├────────┼────────────────────┤ + 2. │ 1 │ UInt64 │ + ├────────┼────────────────────┤ + 3. │ 2 │ UInt64 │ + ├────────┼────────────────────┤ + 4. │ 3 │ UInt64 │ + ├────────┼────────────────────┤ + 5. │ 4 │ UInt64 │ + ├────────┼────────────────────┤ + 6. │ 5 │ UInt64 │ + ├────────┼────────────────────┤ + 7. │ 6 │ UInt64 │ + ├────────┼────────────────────┤ + 8. │ 7 │ UInt64 │ + ├────────┼────────────────────┤ + 9. │ 8 │ UInt64 │ + ├────────┼────────────────────┤ +10. │ 9 │ UInt64 │ + ├────────┼────────────────────┤ +11. │ 10 │ UInt64 │ + ├────────┼────────────────────┤ +12. │ 11 │ UInt64 │ + ├────────┼────────────────────┤ +13. │ 12 │ UInt64 │ + ├────────┼────────────────────┤ +14. │ 13 │ UInt64 │ + ├────────┼────────────────────┤ +15. │ 14 │ UInt64 │ + ├────────┼────────────────────┤ +16. │ 15 │ UInt64 │ + ├────────┼────────────────────┤ +17. │ 16 │ UInt64 │ + ├────────┼────────────────────┤ +18. │ 17 │ UInt64 │ + ├────────┼────────────────────┤ +19. │ 18 │ UInt64 │ + ├────────┼────────────────────┤ +20. │ 19 │ UInt64 │ + ├────────┼────────────────────┤ +21. │ 20 │ UInt64 │ + ├────────┼────────────────────┤ +22. │ 21 │ UInt64 │ + ├────────┼────────────────────┤ +23. │ 22 │ UInt64 │ + ├────────┼────────────────────┤ +24. │ 23 │ UInt64 │ + ├────────┼────────────────────┤ +25. │ 24 │ UInt64 │ + ├────────┼────────────────────┤ +26. │ 25 │ UInt64 │ + ├────────┼────────────────────┤ +27. │ 26 │ UInt64 │ + ├────────┼────────────────────┤ +28. │ 27 │ UInt64 │ + ├────────┼────────────────────┤ +29. │ 28 │ UInt64 │ + ├────────┼────────────────────┤ +30. │ 29 │ UInt64 │ + ├────────┼────────────────────┤ +31. │ 30 │ UInt64 │ + ├────────┼────────────────────┤ +32. │ 31 │ UInt64 │ + ├────────┼────────────────────┤ +33. │ 32 │ UInt64 │ + ├────────┼────────────────────┤ +34. │ 33 │ UInt64 │ + ├────────┼────────────────────┤ +35. │ 34 │ UInt64 │ + ├────────┼────────────────────┤ +36. │ 35 │ UInt64 │ + ├────────┼────────────────────┤ +37. │ 36 │ UInt64 │ + ├────────┼────────────────────┤ +38. │ 37 │ UInt64 │ + ├────────┼────────────────────┤ +39. │ 38 │ UInt64 │ + ├────────┼────────────────────┤ +40. │ 39 │ UInt64 │ + ├────────┼────────────────────┤ +41. │ 40 │ UInt64 │ + ├────────┼────────────────────┤ +42. │ 41 │ UInt64 │ + ├────────┼────────────────────┤ +43. │ 42 │ UInt64 │ + ├────────┼────────────────────┤ +44. │ 43 │ UInt64 │ + ├────────┼────────────────────┤ +45. │ 44 │ UInt64 │ + ├────────┼────────────────────┤ +46. │ 45 │ UInt64 │ + ├────────┼────────────────────┤ +47. │ 46 │ UInt64 │ + ├────────┼────────────────────┤ +48. │ 47 │ UInt64 │ + ├────────┼────────────────────┤ +49. │ 48 │ UInt64 │ + └────────┴────────────────────┘ + ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ + ┃ number ┃ toTypeName(number) ┃ + ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ + 1. │ 0 │ UInt64 │ + ├────────┼────────────────────┤ + 2. │ 1 │ UInt64 │ + ├────────┼────────────────────┤ + 3. │ 2 │ UInt64 │ + ├────────┼────────────────────┤ + 4. │ 3 │ UInt64 │ + ├────────┼────────────────────┤ + 5. │ 4 │ UInt64 │ + ├────────┼────────────────────┤ + 6. │ 5 │ UInt64 │ + ├────────┼────────────────────┤ + 7. │ 6 │ UInt64 │ + ├────────┼────────────────────┤ + 8. │ 7 │ UInt64 │ + ├────────┼────────────────────┤ + 9. │ 8 │ UInt64 │ + ├────────┼────────────────────┤ +10. │ 9 │ UInt64 │ + ┣━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━┫ + ┃ number ┃ toTypeName(number) ┃ + ┗━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━┛ + ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ + ┃ number ┃ toTypeName(number) ┃ + ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ + 1. │ 0 │ UInt64 │ + ├────────┼────────────────────┤ + 2. │ 1 │ UInt64 │ + ├────────┼────────────────────┤ + 3. │ 2 │ UInt64 │ + ├────────┼────────────────────┤ + 4. │ 3 │ UInt64 │ + ├────────┼────────────────────┤ + 5. │ 4 │ UInt64 │ + ├────────┼────────────────────┤ + 6. │ 5 │ UInt64 │ + ├────────┼────────────────────┤ + 7. │ 6 │ UInt64 │ + ├────────┼────────────────────┤ + 8. │ 7 │ UInt64 │ + ├────────┼────────────────────┤ + 9. │ 8 │ UInt64 │ + ├────────┼────────────────────┤ + 10. │ 9 │ UInt64 │ + ├────────┼────────────────────┤ + 11. │ 10 │ UInt64 │ + ├────────┼────────────────────┤ + 12. │ 11 │ UInt64 │ + ├────────┼────────────────────┤ + 13. │ 12 │ UInt64 │ + ├────────┼────────────────────┤ + 14. │ 13 │ UInt64 │ + ├────────┼────────────────────┤ + 15. │ 14 │ UInt64 │ + ├────────┼────────────────────┤ + 16. │ 15 │ UInt64 │ + ├────────┼────────────────────┤ + 17. │ 16 │ UInt64 │ + ├────────┼────────────────────┤ + 18. │ 17 │ UInt64 │ + ├────────┼────────────────────┤ + 19. │ 18 │ UInt64 │ + ├────────┼────────────────────┤ + 20. │ 19 │ UInt64 │ + ├────────┼────────────────────┤ + 21. │ 20 │ UInt64 │ + ├────────┼────────────────────┤ + 22. │ 21 │ UInt64 │ + ├────────┼────────────────────┤ + 23. │ 22 │ UInt64 │ + ├────────┼────────────────────┤ + 24. │ 23 │ UInt64 │ + ├────────┼────────────────────┤ + 25. │ 24 │ UInt64 │ + ├────────┼────────────────────┤ + 26. │ 25 │ UInt64 │ + ├────────┼────────────────────┤ + 27. │ 26 │ UInt64 │ + ├────────┼────────────────────┤ + 28. │ 27 │ UInt64 │ + ├────────┼────────────────────┤ + 29. │ 28 │ UInt64 │ + ├────────┼────────────────────┤ + 30. │ 29 │ UInt64 │ + ├────────┼────────────────────┤ + 31. │ 30 │ UInt64 │ + ├────────┼────────────────────┤ + 32. │ 31 │ UInt64 │ + ├────────┼────────────────────┤ + 33. │ 32 │ UInt64 │ + ├────────┼────────────────────┤ + 34. │ 33 │ UInt64 │ + ├────────┼────────────────────┤ + 35. │ 34 │ UInt64 │ + ├────────┼────────────────────┤ + 36. │ 35 │ UInt64 │ + ├────────┼────────────────────┤ + 37. │ 36 │ UInt64 │ + ├────────┼────────────────────┤ + 38. │ 37 │ UInt64 │ + ├────────┼────────────────────┤ + 39. │ 38 │ UInt64 │ + ├────────┼────────────────────┤ + 40. │ 39 │ UInt64 │ + ├────────┼────────────────────┤ + 41. │ 40 │ UInt64 │ + ├────────┼────────────────────┤ + 42. │ 41 │ UInt64 │ + ├────────┼────────────────────┤ + 43. │ 42 │ UInt64 │ + ├────────┼────────────────────┤ + 44. │ 43 │ UInt64 │ + ├────────┼────────────────────┤ + 45. │ 44 │ UInt64 │ + ├────────┼────────────────────┤ + 46. │ 45 │ UInt64 │ + ├────────┼────────────────────┤ + 47. │ 46 │ UInt64 │ + ├────────┼────────────────────┤ + 48. │ 47 │ UInt64 │ + ├────────┼────────────────────┤ + 49. │ 48 │ UInt64 │ + ├────────┼────────────────────┤ + 50. │ 49 │ UInt64 │ + ├────────┼────────────────────┤ + 51. │ 50 │ UInt64 │ + ├────────┼────────────────────┤ + 52. │ 51 │ UInt64 │ + ├────────┼────────────────────┤ + 53. │ 52 │ UInt64 │ + ├────────┼────────────────────┤ + 54. │ 53 │ UInt64 │ + ├────────┼────────────────────┤ + 55. │ 54 │ UInt64 │ + ├────────┼────────────────────┤ + 56. │ 55 │ UInt64 │ + ├────────┼────────────────────┤ + 57. │ 56 │ UInt64 │ + ├────────┼────────────────────┤ + 58. │ 57 │ UInt64 │ + ├────────┼────────────────────┤ + 59. │ 58 │ UInt64 │ + ├────────┼────────────────────┤ + 60. │ 59 │ UInt64 │ + ├────────┼────────────────────┤ + 61. │ 60 │ UInt64 │ + ├────────┼────────────────────┤ + 62. │ 61 │ UInt64 │ + ├────────┼────────────────────┤ + 63. │ 62 │ UInt64 │ + ├────────┼────────────────────┤ + 64. │ 63 │ UInt64 │ + ├────────┼────────────────────┤ + 65. │ 64 │ UInt64 │ + ├────────┼────────────────────┤ + 66. │ 65 │ UInt64 │ + ├────────┼────────────────────┤ + 67. │ 66 │ UInt64 │ + ├────────┼────────────────────┤ + 68. │ 67 │ UInt64 │ + ├────────┼────────────────────┤ + 69. │ 68 │ UInt64 │ + ├────────┼────────────────────┤ + 70. │ 69 │ UInt64 │ + ├────────┼────────────────────┤ + 71. │ 70 │ UInt64 │ + ├────────┼────────────────────┤ + 72. │ 71 │ UInt64 │ + ├────────┼────────────────────┤ + 73. │ 72 │ UInt64 │ + ├────────┼────────────────────┤ + 74. │ 73 │ UInt64 │ + ├────────┼────────────────────┤ + 75. │ 74 │ UInt64 │ + ├────────┼────────────────────┤ + 76. │ 75 │ UInt64 │ + ├────────┼────────────────────┤ + 77. │ 76 │ UInt64 │ + ├────────┼────────────────────┤ + 78. │ 77 │ UInt64 │ + ├────────┼────────────────────┤ + 79. │ 78 │ UInt64 │ + ├────────┼────────────────────┤ + 80. │ 79 │ UInt64 │ + ├────────┼────────────────────┤ + 81. │ 80 │ UInt64 │ + ├────────┼────────────────────┤ + 82. │ 81 │ UInt64 │ + ├────────┼────────────────────┤ + 83. │ 82 │ UInt64 │ + ├────────┼────────────────────┤ + 84. │ 83 │ UInt64 │ + ├────────┼────────────────────┤ + 85. │ 84 │ UInt64 │ + ├────────┼────────────────────┤ + 86. │ 85 │ UInt64 │ + ├────────┼────────────────────┤ + 87. │ 86 │ UInt64 │ + ├────────┼────────────────────┤ + 88. │ 87 │ UInt64 │ + ├────────┼────────────────────┤ + 89. │ 88 │ UInt64 │ + ├────────┼────────────────────┤ + 90. │ 89 │ UInt64 │ + ├────────┼────────────────────┤ + 91. │ 90 │ UInt64 │ + ├────────┼────────────────────┤ + 92. │ 91 │ UInt64 │ + ├────────┼────────────────────┤ + 93. │ 92 │ UInt64 │ + ├────────┼────────────────────┤ + 94. │ 93 │ UInt64 │ + ├────────┼────────────────────┤ + 95. │ 94 │ UInt64 │ + ├────────┼────────────────────┤ + 96. │ 95 │ UInt64 │ + ├────────┼────────────────────┤ + 97. │ 96 │ UInt64 │ + ├────────┼────────────────────┤ + 98. │ 97 │ UInt64 │ + ├────────┼────────────────────┤ + 99. │ 98 │ UInt64 │ + ├────────┼────────────────────┤ +100. │ 99 │ UInt64 │ + └────────┴────────────────────┘ + ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ + ┃ number ┃ toTypeName(number) ┃ + ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ + 1. │ 0 │ UInt64 │ + ├────────┼────────────────────┤ + 2. │ 1 │ UInt64 │ + ├────────┼────────────────────┤ + 3. │ 2 │ UInt64 │ + ├────────┼────────────────────┤ + 4. │ 3 │ UInt64 │ + ├────────┼────────────────────┤ + 5. │ 4 │ UInt64 │ + ├────────┼────────────────────┤ + 6. │ 5 │ UInt64 │ + ├────────┼────────────────────┤ + 7. │ 6 │ UInt64 │ + ├────────┼────────────────────┤ + 8. │ 7 │ UInt64 │ + ├────────┼────────────────────┤ + 9. │ 8 │ UInt64 │ + ├────────┼────────────────────┤ + 10. │ 9 │ UInt64 │ + ├────────┼────────────────────┤ + 11. │ 10 │ UInt64 │ + ├────────┼────────────────────┤ + 12. │ 11 │ UInt64 │ + ├────────┼────────────────────┤ + 13. │ 12 │ UInt64 │ + ├────────┼────────────────────┤ + 14. │ 13 │ UInt64 │ + ├────────┼────────────────────┤ + 15. │ 14 │ UInt64 │ + ├────────┼────────────────────┤ + 16. │ 15 │ UInt64 │ + ├────────┼────────────────────┤ + 17. │ 16 │ UInt64 │ + ├────────┼────────────────────┤ + 18. │ 17 │ UInt64 │ + ├────────┼────────────────────┤ + 19. │ 18 │ UInt64 │ + ├────────┼────────────────────┤ + 20. │ 19 │ UInt64 │ + ├────────┼────────────────────┤ + 21. │ 20 │ UInt64 │ + ├────────┼────────────────────┤ + 22. │ 21 │ UInt64 │ + ├────────┼────────────────────┤ + 23. │ 22 │ UInt64 │ + ├────────┼────────────────────┤ + 24. │ 23 │ UInt64 │ + ├────────┼────────────────────┤ + 25. │ 24 │ UInt64 │ + ├────────┼────────────────────┤ + 26. │ 25 │ UInt64 │ + ├────────┼────────────────────┤ + 27. │ 26 │ UInt64 │ + ├────────┼────────────────────┤ + 28. │ 27 │ UInt64 │ + ├────────┼────────────────────┤ + 29. │ 28 │ UInt64 │ + ├────────┼────────────────────┤ + 30. │ 29 │ UInt64 │ + ├────────┼────────────────────┤ + 31. │ 30 │ UInt64 │ + ├────────┼────────────────────┤ + 32. │ 31 │ UInt64 │ + ├────────┼────────────────────┤ + 33. │ 32 │ UInt64 │ + ├────────┼────────────────────┤ + 34. │ 33 │ UInt64 │ + ├────────┼────────────────────┤ + 35. │ 34 │ UInt64 │ + ├────────┼────────────────────┤ + 36. │ 35 │ UInt64 │ + ├────────┼────────────────────┤ + 37. │ 36 │ UInt64 │ + ├────────┼────────────────────┤ + 38. │ 37 │ UInt64 │ + ├────────┼────────────────────┤ + 39. │ 38 │ UInt64 │ + ├────────┼────────────────────┤ + 40. │ 39 │ UInt64 │ + ├────────┼────────────────────┤ + 41. │ 40 │ UInt64 │ + ├────────┼────────────────────┤ + 42. │ 41 │ UInt64 │ + ├────────┼────────────────────┤ + 43. │ 42 │ UInt64 │ + ├────────┼────────────────────┤ + 44. │ 43 │ UInt64 │ + ├────────┼────────────────────┤ + 45. │ 44 │ UInt64 │ + ├────────┼────────────────────┤ + 46. │ 45 │ UInt64 │ + ├────────┼────────────────────┤ + 47. │ 46 │ UInt64 │ + ├────────┼────────────────────┤ + 48. │ 47 │ UInt64 │ + ├────────┼────────────────────┤ + 49. │ 48 │ UInt64 │ + ├────────┼────────────────────┤ + 50. │ 49 │ UInt64 │ + ├────────┼────────────────────┤ + 51. │ 50 │ UInt64 │ + ├────────┼────────────────────┤ + 52. │ 51 │ UInt64 │ + ├────────┼────────────────────┤ + 53. │ 52 │ UInt64 │ + ├────────┼────────────────────┤ + 54. │ 53 │ UInt64 │ + ├────────┼────────────────────┤ + 55. │ 54 │ UInt64 │ + ├────────┼────────────────────┤ + 56. │ 55 │ UInt64 │ + ├────────┼────────────────────┤ + 57. │ 56 │ UInt64 │ + ├────────┼────────────────────┤ + 58. │ 57 │ UInt64 │ + ├────────┼────────────────────┤ + 59. │ 58 │ UInt64 │ + ├────────┼────────────────────┤ + 60. │ 59 │ UInt64 │ + ├────────┼────────────────────┤ + 61. │ 60 │ UInt64 │ + ├────────┼────────────────────┤ + 62. │ 61 │ UInt64 │ + ├────────┼────────────────────┤ + 63. │ 62 │ UInt64 │ + ├────────┼────────────────────┤ + 64. │ 63 │ UInt64 │ + ├────────┼────────────────────┤ + 65. │ 64 │ UInt64 │ + ├────────┼────────────────────┤ + 66. │ 65 │ UInt64 │ + ├────────┼────────────────────┤ + 67. │ 66 │ UInt64 │ + ├────────┼────────────────────┤ + 68. │ 67 │ UInt64 │ + ├────────┼────────────────────┤ + 69. │ 68 │ UInt64 │ + ├────────┼────────────────────┤ + 70. │ 69 │ UInt64 │ + ├────────┼────────────────────┤ + 71. │ 70 │ UInt64 │ + ├────────┼────────────────────┤ + 72. │ 71 │ UInt64 │ + ├────────┼────────────────────┤ + 73. │ 72 │ UInt64 │ + ├────────┼────────────────────┤ + 74. │ 73 │ UInt64 │ + ├────────┼────────────────────┤ + 75. │ 74 │ UInt64 │ + ├────────┼────────────────────┤ + 76. │ 75 │ UInt64 │ + ├────────┼────────────────────┤ + 77. │ 76 │ UInt64 │ + ├────────┼────────────────────┤ + 78. │ 77 │ UInt64 │ + ├────────┼────────────────────┤ + 79. │ 78 │ UInt64 │ + ├────────┼────────────────────┤ + 80. │ 79 │ UInt64 │ + ├────────┼────────────────────┤ + 81. │ 80 │ UInt64 │ + ├────────┼────────────────────┤ + 82. │ 81 │ UInt64 │ + ├────────┼────────────────────┤ + 83. │ 82 │ UInt64 │ + ├────────┼────────────────────┤ + 84. │ 83 │ UInt64 │ + ├────────┼────────────────────┤ + 85. │ 84 │ UInt64 │ + ├────────┼────────────────────┤ + 86. │ 85 │ UInt64 │ + ├────────┼────────────────────┤ + 87. │ 86 │ UInt64 │ + ├────────┼────────────────────┤ + 88. │ 87 │ UInt64 │ + ├────────┼────────────────────┤ + 89. │ 88 │ UInt64 │ + ├────────┼────────────────────┤ + 90. │ 89 │ UInt64 │ + ├────────┼────────────────────┤ + 91. │ 90 │ UInt64 │ + ├────────┼────────────────────┤ + 92. │ 91 │ UInt64 │ + ├────────┼────────────────────┤ + 93. │ 92 │ UInt64 │ + ├────────┼────────────────────┤ + 94. │ 93 │ UInt64 │ + ├────────┼────────────────────┤ + 95. │ 94 │ UInt64 │ + ├────────┼────────────────────┤ + 96. │ 95 │ UInt64 │ + ├────────┼────────────────────┤ + 97. │ 96 │ UInt64 │ + ├────────┼────────────────────┤ + 98. │ 97 │ UInt64 │ + ├────────┼────────────────────┤ + 99. │ 98 │ UInt64 │ + ├────────┼────────────────────┤ +100. │ 99 │ UInt64 │ + ┣━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━┫ + ┃ number ┃ toTypeName(number) ┃ + ┗━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━┛ + ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ + ┃ number ┃ toTypeName(number) ┃ + ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ + 1. │ 0 │ UInt64 │ + ├────────┼────────────────────┤ + 2. │ 1 │ UInt64 │ + ├────────┼────────────────────┤ + 3. │ 2 │ UInt64 │ + ├────────┼────────────────────┤ + 4. │ 3 │ UInt64 │ + ├────────┼────────────────────┤ + 5. │ 4 │ UInt64 │ + ├────────┼────────────────────┤ + 6. │ 5 │ UInt64 │ + ├────────┼────────────────────┤ + 7. │ 6 │ UInt64 │ + ├────────┼────────────────────┤ + 8. │ 7 │ UInt64 │ + ├────────┼────────────────────┤ + 9. │ 8 │ UInt64 │ + ├────────┼────────────────────┤ + 10. │ 9 │ UInt64 │ + ├────────┼────────────────────┤ + 11. │ 10 │ UInt64 │ + ├────────┼────────────────────┤ + 12. │ 11 │ UInt64 │ + ├────────┼────────────────────┤ + 13. │ 12 │ UInt64 │ + ├────────┼────────────────────┤ + 14. │ 13 │ UInt64 │ + ├────────┼────────────────────┤ + 15. │ 14 │ UInt64 │ + ├────────┼────────────────────┤ + 16. │ 15 │ UInt64 │ + ├────────┼────────────────────┤ + 17. │ 16 │ UInt64 │ + ├────────┼────────────────────┤ + 18. │ 17 │ UInt64 │ + ├────────┼────────────────────┤ + 19. │ 18 │ UInt64 │ + ├────────┼────────────────────┤ + 20. │ 19 │ UInt64 │ + ├────────┼────────────────────┤ + 21. │ 20 │ UInt64 │ + ├────────┼────────────────────┤ + 22. │ 21 │ UInt64 │ + ├────────┼────────────────────┤ + 23. │ 22 │ UInt64 │ + ├────────┼────────────────────┤ + 24. │ 23 │ UInt64 │ + ├────────┼────────────────────┤ + 25. │ 24 │ UInt64 │ + ├────────┼────────────────────┤ + 26. │ 25 │ UInt64 │ + ├────────┼────────────────────┤ + 27. │ 26 │ UInt64 │ + ├────────┼────────────────────┤ + 28. │ 27 │ UInt64 │ + ├────────┼────────────────────┤ + 29. │ 28 │ UInt64 │ + ├────────┼────────────────────┤ + 30. │ 29 │ UInt64 │ + ├────────┼────────────────────┤ + 31. │ 30 │ UInt64 │ + ├────────┼────────────────────┤ + 32. │ 31 │ UInt64 │ + ├────────┼────────────────────┤ + 33. │ 32 │ UInt64 │ + ├────────┼────────────────────┤ + 34. │ 33 │ UInt64 │ + ├────────┼────────────────────┤ + 35. │ 34 │ UInt64 │ + ├────────┼────────────────────┤ + 36. │ 35 │ UInt64 │ + ├────────┼────────────────────┤ + 37. │ 36 │ UInt64 │ + ├────────┼────────────────────┤ + 38. │ 37 │ UInt64 │ + ├────────┼────────────────────┤ + 39. │ 38 │ UInt64 │ + ├────────┼────────────────────┤ + 40. │ 39 │ UInt64 │ + ├────────┼────────────────────┤ + 41. │ 40 │ UInt64 │ + ├────────┼────────────────────┤ + 42. │ 41 │ UInt64 │ + ├────────┼────────────────────┤ + 43. │ 42 │ UInt64 │ + ├────────┼────────────────────┤ + 44. │ 43 │ UInt64 │ + ├────────┼────────────────────┤ + 45. │ 44 │ UInt64 │ + ├────────┼────────────────────┤ + 46. │ 45 │ UInt64 │ + ├────────┼────────────────────┤ + 47. │ 46 │ UInt64 │ + ├────────┼────────────────────┤ + 48. │ 47 │ UInt64 │ + ├────────┼────────────────────┤ + 49. │ 48 │ UInt64 │ + ├────────┼────────────────────┤ + 50. │ 49 │ UInt64 │ + ├────────┼────────────────────┤ + 51. │ 50 │ UInt64 │ + ├────────┼────────────────────┤ + 52. │ 51 │ UInt64 │ + ├────────┼────────────────────┤ + 53. │ 52 │ UInt64 │ + ├────────┼────────────────────┤ + 54. │ 53 │ UInt64 │ + ├────────┼────────────────────┤ + 55. │ 54 │ UInt64 │ + ├────────┼────────────────────┤ + 56. │ 55 │ UInt64 │ + ├────────┼────────────────────┤ + 57. │ 56 │ UInt64 │ + ├────────┼────────────────────┤ + 58. │ 57 │ UInt64 │ + ├────────┼────────────────────┤ + 59. │ 58 │ UInt64 │ + ├────────┼────────────────────┤ + 60. │ 59 │ UInt64 │ + ├────────┼────────────────────┤ + 61. │ 60 │ UInt64 │ + ├────────┼────────────────────┤ + 62. │ 61 │ UInt64 │ + ├────────┼────────────────────┤ + 63. │ 62 │ UInt64 │ + ├────────┼────────────────────┤ + 64. │ 63 │ UInt64 │ + ├────────┼────────────────────┤ + 65. │ 64 │ UInt64 │ + ├────────┼────────────────────┤ + 66. │ 65 │ UInt64 │ + ├────────┼────────────────────┤ + 67. │ 66 │ UInt64 │ + ├────────┼────────────────────┤ + 68. │ 67 │ UInt64 │ + ├────────┼────────────────────┤ + 69. │ 68 │ UInt64 │ + ├────────┼────────────────────┤ + 70. │ 69 │ UInt64 │ + ├────────┼────────────────────┤ + 71. │ 70 │ UInt64 │ + ├────────┼────────────────────┤ + 72. │ 71 │ UInt64 │ + ├────────┼────────────────────┤ + 73. │ 72 │ UInt64 │ + ├────────┼────────────────────┤ + 74. │ 73 │ UInt64 │ + ├────────┼────────────────────┤ + 75. │ 74 │ UInt64 │ + ├────────┼────────────────────┤ + 76. │ 75 │ UInt64 │ + ├────────┼────────────────────┤ + 77. │ 76 │ UInt64 │ + ├────────┼────────────────────┤ + 78. │ 77 │ UInt64 │ + ├────────┼────────────────────┤ + 79. │ 78 │ UInt64 │ + ├────────┼────────────────────┤ + 80. │ 79 │ UInt64 │ + ├────────┼────────────────────┤ + 81. │ 80 │ UInt64 │ + ├────────┼────────────────────┤ + 82. │ 81 │ UInt64 │ + ├────────┼────────────────────┤ + 83. │ 82 │ UInt64 │ + ├────────┼────────────────────┤ + 84. │ 83 │ UInt64 │ + ├────────┼────────────────────┤ + 85. │ 84 │ UInt64 │ + ├────────┼────────────────────┤ + 86. │ 85 │ UInt64 │ + ├────────┼────────────────────┤ + 87. │ 86 │ UInt64 │ + ├────────┼────────────────────┤ + 88. │ 87 │ UInt64 │ + ├────────┼────────────────────┤ + 89. │ 88 │ UInt64 │ + ├────────┼────────────────────┤ + 90. │ 89 │ UInt64 │ + ├────────┼────────────────────┤ + 91. │ 90 │ UInt64 │ + ├────────┼────────────────────┤ + 92. │ 91 │ UInt64 │ + ├────────┼────────────────────┤ + 93. │ 92 │ UInt64 │ + ├────────┼────────────────────┤ + 94. │ 93 │ UInt64 │ + ├────────┼────────────────────┤ + 95. │ 94 │ UInt64 │ + ├────────┼────────────────────┤ + 96. │ 95 │ UInt64 │ + ├────────┼────────────────────┤ + 97. │ 96 │ UInt64 │ + ├────────┼────────────────────┤ + 98. │ 97 │ UInt64 │ + ├────────┼────────────────────┤ + 99. │ 98 │ UInt64 │ + ├────────┼────────────────────┤ +100. │ 99 │ UInt64 │ + ┣━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━┫ + ┃ number ┃ toTypeName(number) ┃ + ┗━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━┛ + ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ + ┃ number ┃ toTypeName(number) ┃ + ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ + 1. │ 0 │ UInt64 │ + ├────────┼────────────────────┤ + 2. │ 1 │ UInt64 │ + ├────────┼────────────────────┤ + 3. │ 2 │ UInt64 │ + ├────────┼────────────────────┤ + 4. │ 3 │ UInt64 │ + ├────────┼────────────────────┤ + 5. │ 4 │ UInt64 │ + ├────────┼────────────────────┤ + 6. │ 5 │ UInt64 │ + ├────────┼────────────────────┤ + 7. │ 6 │ UInt64 │ + ├────────┼────────────────────┤ + 8. │ 7 │ UInt64 │ + ├────────┼────────────────────┤ + 9. │ 8 │ UInt64 │ + ├────────┼────────────────────┤ + 10. │ 9 │ UInt64 │ + ├────────┼────────────────────┤ + 11. │ 10 │ UInt64 │ + ├────────┼────────────────────┤ + 12. │ 11 │ UInt64 │ + ├────────┼────────────────────┤ + 13. │ 12 │ UInt64 │ + ├────────┼────────────────────┤ + 14. │ 13 │ UInt64 │ + ├────────┼────────────────────┤ + 15. │ 14 │ UInt64 │ + ├────────┼────────────────────┤ + 16. │ 15 │ UInt64 │ + ├────────┼────────────────────┤ + 17. │ 16 │ UInt64 │ + ├────────┼────────────────────┤ + 18. │ 17 │ UInt64 │ + ├────────┼────────────────────┤ + 19. │ 18 │ UInt64 │ + ├────────┼────────────────────┤ + 20. │ 19 │ UInt64 │ + ├────────┼────────────────────┤ + 21. │ 20 │ UInt64 │ + ├────────┼────────────────────┤ + 22. │ 21 │ UInt64 │ + ├────────┼────────────────────┤ + 23. │ 22 │ UInt64 │ + ├────────┼────────────────────┤ + 24. │ 23 │ UInt64 │ + ├────────┼────────────────────┤ + 25. │ 24 │ UInt64 │ + ├────────┼────────────────────┤ + 26. │ 25 │ UInt64 │ + ├────────┼────────────────────┤ + 27. │ 26 │ UInt64 │ + ├────────┼────────────────────┤ + 28. │ 27 │ UInt64 │ + ├────────┼────────────────────┤ + 29. │ 28 │ UInt64 │ + ├────────┼────────────────────┤ + 30. │ 29 │ UInt64 │ + ├────────┼────────────────────┤ + 31. │ 30 │ UInt64 │ + ├────────┼────────────────────┤ + 32. │ 31 │ UInt64 │ + ├────────┼────────────────────┤ + 33. │ 32 │ UInt64 │ + ├────────┼────────────────────┤ + 34. │ 33 │ UInt64 │ + ├────────┼────────────────────┤ + 35. │ 34 │ UInt64 │ + ├────────┼────────────────────┤ + 36. │ 35 │ UInt64 │ + ├────────┼────────────────────┤ + 37. │ 36 │ UInt64 │ + ├────────┼────────────────────┤ + 38. │ 37 │ UInt64 │ + ├────────┼────────────────────┤ + 39. │ 38 │ UInt64 │ + ├────────┼────────────────────┤ + 40. │ 39 │ UInt64 │ + ├────────┼────────────────────┤ + 41. │ 40 │ UInt64 │ + ├────────┼────────────────────┤ + 42. │ 41 │ UInt64 │ + ├────────┼────────────────────┤ + 43. │ 42 │ UInt64 │ + ├────────┼────────────────────┤ + 44. │ 43 │ UInt64 │ + ├────────┼────────────────────┤ + 45. │ 44 │ UInt64 │ + ├────────┼────────────────────┤ + 46. │ 45 │ UInt64 │ + ├────────┼────────────────────┤ + 47. │ 46 │ UInt64 │ + ├────────┼────────────────────┤ + 48. │ 47 │ UInt64 │ + ├────────┼────────────────────┤ + 49. │ 48 │ UInt64 │ + ├────────┼────────────────────┤ + 50. │ 49 │ UInt64 │ + ├────────┼────────────────────┤ + 51. │ 50 │ UInt64 │ + ├────────┼────────────────────┤ + 52. │ 51 │ UInt64 │ + ├────────┼────────────────────┤ + 53. │ 52 │ UInt64 │ + ├────────┼────────────────────┤ + 54. │ 53 │ UInt64 │ + ├────────┼────────────────────┤ + 55. │ 54 │ UInt64 │ + ├────────┼────────────────────┤ + 56. │ 55 │ UInt64 │ + ├────────┼────────────────────┤ + 57. │ 56 │ UInt64 │ + ├────────┼────────────────────┤ + 58. │ 57 │ UInt64 │ + ├────────┼────────────────────┤ + 59. │ 58 │ UInt64 │ + ├────────┼────────────────────┤ + 60. │ 59 │ UInt64 │ + ├────────┼────────────────────┤ + 61. │ 60 │ UInt64 │ + ├────────┼────────────────────┤ + 62. │ 61 │ UInt64 │ + ├────────┼────────────────────┤ + 63. │ 62 │ UInt64 │ + ├────────┼────────────────────┤ + 64. │ 63 │ UInt64 │ + ├────────┼────────────────────┤ + 65. │ 64 │ UInt64 │ + ├────────┼────────────────────┤ + 66. │ 65 │ UInt64 │ + ├────────┼────────────────────┤ + 67. │ 66 │ UInt64 │ + ├────────┼────────────────────┤ + 68. │ 67 │ UInt64 │ + ├────────┼────────────────────┤ + 69. │ 68 │ UInt64 │ + ├────────┼────────────────────┤ + 70. │ 69 │ UInt64 │ + ├────────┼────────────────────┤ + 71. │ 70 │ UInt64 │ + ├────────┼────────────────────┤ + 72. │ 71 │ UInt64 │ + ├────────┼────────────────────┤ + 73. │ 72 │ UInt64 │ + ├────────┼────────────────────┤ + 74. │ 73 │ UInt64 │ + ├────────┼────────────────────┤ + 75. │ 74 │ UInt64 │ + ├────────┼────────────────────┤ + 76. │ 75 │ UInt64 │ + ├────────┼────────────────────┤ + 77. │ 76 │ UInt64 │ + ├────────┼────────────────────┤ + 78. │ 77 │ UInt64 │ + ├────────┼────────────────────┤ + 79. │ 78 │ UInt64 │ + ├────────┼────────────────────┤ + 80. │ 79 │ UInt64 │ + ├────────┼────────────────────┤ + 81. │ 80 │ UInt64 │ + ├────────┼────────────────────┤ + 82. │ 81 │ UInt64 │ + ├────────┼────────────────────┤ + 83. │ 82 │ UInt64 │ + ├────────┼────────────────────┤ + 84. │ 83 │ UInt64 │ + ├────────┼────────────────────┤ + 85. │ 84 │ UInt64 │ + ├────────┼────────────────────┤ + 86. │ 85 │ UInt64 │ + ├────────┼────────────────────┤ + 87. │ 86 │ UInt64 │ + ├────────┼────────────────────┤ + 88. │ 87 │ UInt64 │ + ├────────┼────────────────────┤ + 89. │ 88 │ UInt64 │ + ├────────┼────────────────────┤ + 90. │ 89 │ UInt64 │ + ├────────┼────────────────────┤ + 91. │ 90 │ UInt64 │ + ├────────┼────────────────────┤ + 92. │ 91 │ UInt64 │ + ├────────┼────────────────────┤ + 93. │ 92 │ UInt64 │ + ├────────┼────────────────────┤ + 94. │ 93 │ UInt64 │ + ├────────┼────────────────────┤ + 95. │ 94 │ UInt64 │ + ├────────┼────────────────────┤ + 96. │ 95 │ UInt64 │ + ├────────┼────────────────────┤ + 97. │ 96 │ UInt64 │ + ├────────┼────────────────────┤ + 98. │ 97 │ UInt64 │ + ├────────┼────────────────────┤ + 99. │ 98 │ UInt64 │ + ├────────┼────────────────────┤ +100. │ 99 │ UInt64 │ + ┣━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━┫ + ┃ number ┃ toTypeName(number) ┃ + ┗━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━┛ + ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ + ┃ number ┃ toTypeName(number) ┃ + ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ + 1. │ 0 │ UInt64 │ + ├────────┼────────────────────┤ + 2. │ 1 │ UInt64 │ + ├────────┼────────────────────┤ + 3. │ 2 │ UInt64 │ + ├────────┼────────────────────┤ + 4. │ 3 │ UInt64 │ + ├────────┼────────────────────┤ + 5. │ 4 │ UInt64 │ + ├────────┼────────────────────┤ + 6. │ 5 │ UInt64 │ + ├────────┼────────────────────┤ + 7. │ 6 │ UInt64 │ + ├────────┼────────────────────┤ + 8. │ 7 │ UInt64 │ + ├────────┼────────────────────┤ + 9. │ 8 │ UInt64 │ + ├────────┼────────────────────┤ + 10. │ 9 │ UInt64 │ + ├────────┼────────────────────┤ + 11. │ 10 │ UInt64 │ + ├────────┼────────────────────┤ + 12. │ 11 │ UInt64 │ + ├────────┼────────────────────┤ + 13. │ 12 │ UInt64 │ + ├────────┼────────────────────┤ + 14. │ 13 │ UInt64 │ + ├────────┼────────────────────┤ + 15. │ 14 │ UInt64 │ + ├────────┼────────────────────┤ + 16. │ 15 │ UInt64 │ + ├────────┼────────────────────┤ + 17. │ 16 │ UInt64 │ + ├────────┼────────────────────┤ + 18. │ 17 │ UInt64 │ + ├────────┼────────────────────┤ + 19. │ 18 │ UInt64 │ + ├────────┼────────────────────┤ + 20. │ 19 │ UInt64 │ + ├────────┼────────────────────┤ + 21. │ 20 │ UInt64 │ + ├────────┼────────────────────┤ + 22. │ 21 │ UInt64 │ + ├────────┼────────────────────┤ + 23. │ 22 │ UInt64 │ + ├────────┼────────────────────┤ + 24. │ 23 │ UInt64 │ + ├────────┼────────────────────┤ + 25. │ 24 │ UInt64 │ + ├────────┼────────────────────┤ + 26. │ 25 │ UInt64 │ + ├────────┼────────────────────┤ + 27. │ 26 │ UInt64 │ + ├────────┼────────────────────┤ + 28. │ 27 │ UInt64 │ + ├────────┼────────────────────┤ + 29. │ 28 │ UInt64 │ + ├────────┼────────────────────┤ + 30. │ 29 │ UInt64 │ + ├────────┼────────────────────┤ + 31. │ 30 │ UInt64 │ + ├────────┼────────────────────┤ + 32. │ 31 │ UInt64 │ + ├────────┼────────────────────┤ + 33. │ 32 │ UInt64 │ + ├────────┼────────────────────┤ + 34. │ 33 │ UInt64 │ + ├────────┼────────────────────┤ + 35. │ 34 │ UInt64 │ + ├────────┼────────────────────┤ + 36. │ 35 │ UInt64 │ + ├────────┼────────────────────┤ + 37. │ 36 │ UInt64 │ + ├────────┼────────────────────┤ + 38. │ 37 │ UInt64 │ + ├────────┼────────────────────┤ + 39. │ 38 │ UInt64 │ + ├────────┼────────────────────┤ + 40. │ 39 │ UInt64 │ + ├────────┼────────────────────┤ + 41. │ 40 │ UInt64 │ + ├────────┼────────────────────┤ + 42. │ 41 │ UInt64 │ + ├────────┼────────────────────┤ + 43. │ 42 │ UInt64 │ + ├────────┼────────────────────┤ + 44. │ 43 │ UInt64 │ + ├────────┼────────────────────┤ + 45. │ 44 │ UInt64 │ + ├────────┼────────────────────┤ + 46. │ 45 │ UInt64 │ + ├────────┼────────────────────┤ + 47. │ 46 │ UInt64 │ + ├────────┼────────────────────┤ + 48. │ 47 │ UInt64 │ + ├────────┼────────────────────┤ + 49. │ 48 │ UInt64 │ + ├────────┼────────────────────┤ + 50. │ 49 │ UInt64 │ + ├────────┼────────────────────┤ + 51. │ 50 │ UInt64 │ + ├────────┼────────────────────┤ + 52. │ 51 │ UInt64 │ + ├────────┼────────────────────┤ + 53. │ 52 │ UInt64 │ + ├────────┼────────────────────┤ + 54. │ 53 │ UInt64 │ + ├────────┼────────────────────┤ + 55. │ 54 │ UInt64 │ + ├────────┼────────────────────┤ + 56. │ 55 │ UInt64 │ + ├────────┼────────────────────┤ + 57. │ 56 │ UInt64 │ + ├────────┼────────────────────┤ + 58. │ 57 │ UInt64 │ + ├────────┼────────────────────┤ + 59. │ 58 │ UInt64 │ + ├────────┼────────────────────┤ + 60. │ 59 │ UInt64 │ + ├────────┼────────────────────┤ + 61. │ 60 │ UInt64 │ + ├────────┼────────────────────┤ + 62. │ 61 │ UInt64 │ + ├────────┼────────────────────┤ + 63. │ 62 │ UInt64 │ + ├────────┼────────────────────┤ + 64. │ 63 │ UInt64 │ + ├────────┼────────────────────┤ + 65. │ 64 │ UInt64 │ + ├────────┼────────────────────┤ + 66. │ 65 │ UInt64 │ + ├────────┼────────────────────┤ + 67. │ 66 │ UInt64 │ + ├────────┼────────────────────┤ + 68. │ 67 │ UInt64 │ + ├────────┼────────────────────┤ + 69. │ 68 │ UInt64 │ + ├────────┼────────────────────┤ + 70. │ 69 │ UInt64 │ + ├────────┼────────────────────┤ + 71. │ 70 │ UInt64 │ + ├────────┼────────────────────┤ + 72. │ 71 │ UInt64 │ + ├────────┼────────────────────┤ + 73. │ 72 │ UInt64 │ + ├────────┼────────────────────┤ + 74. │ 73 │ UInt64 │ + ├────────┼────────────────────┤ + 75. │ 74 │ UInt64 │ + ├────────┼────────────────────┤ + 76. │ 75 │ UInt64 │ + ├────────┼────────────────────┤ + 77. │ 76 │ UInt64 │ + ├────────┼────────────────────┤ + 78. │ 77 │ UInt64 │ + ├────────┼────────────────────┤ + 79. │ 78 │ UInt64 │ + ├────────┼────────────────────┤ + 80. │ 79 │ UInt64 │ + ├────────┼────────────────────┤ + 81. │ 80 │ UInt64 │ + ├────────┼────────────────────┤ + 82. │ 81 │ UInt64 │ + ├────────┼────────────────────┤ + 83. │ 82 │ UInt64 │ + ├────────┼────────────────────┤ + 84. │ 83 │ UInt64 │ + ├────────┼────────────────────┤ + 85. │ 84 │ UInt64 │ + ├────────┼────────────────────┤ + 86. │ 85 │ UInt64 │ + ├────────┼────────────────────┤ + 87. │ 86 │ UInt64 │ + ├────────┼────────────────────┤ + 88. │ 87 │ UInt64 │ + ├────────┼────────────────────┤ + 89. │ 88 │ UInt64 │ + ├────────┼────────────────────┤ + 90. │ 89 │ UInt64 │ + ├────────┼────────────────────┤ + 91. │ 90 │ UInt64 │ + ├────────┼────────────────────┤ + 92. │ 91 │ UInt64 │ + ├────────┼────────────────────┤ + 93. │ 92 │ UInt64 │ + ├────────┼────────────────────┤ + 94. │ 93 │ UInt64 │ + ├────────┼────────────────────┤ + 95. │ 94 │ UInt64 │ + ├────────┼────────────────────┤ + 96. │ 95 │ UInt64 │ + ├────────┼────────────────────┤ + 97. │ 96 │ UInt64 │ + ├────────┼────────────────────┤ + 98. │ 97 │ UInt64 │ + ├────────┼────────────────────┤ + 99. │ 98 │ UInt64 │ + ├────────┼────────────────────┤ +100. │ 99 │ UInt64 │ + ┣━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━┫ + ┃ number ┃ toTypeName(number) ┃ + ┗━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━┛ + ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ + ┃ number ┃ toTypeName(number) ┃ + ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ + 1. │ 0 │ UInt64 │ + ├────────┼────────────────────┤ + 2. │ 1 │ UInt64 │ + ├────────┼────────────────────┤ + 3. │ 2 │ UInt64 │ + ├────────┼────────────────────┤ + 4. │ 3 │ UInt64 │ + ├────────┼────────────────────┤ + 5. │ 4 │ UInt64 │ + ├────────┼────────────────────┤ + 6. │ 5 │ UInt64 │ + ├────────┼────────────────────┤ + 7. │ 6 │ UInt64 │ + ├────────┼────────────────────┤ + 8. │ 7 │ UInt64 │ + ├────────┼────────────────────┤ + 9. │ 8 │ UInt64 │ + ├────────┼────────────────────┤ + 10. │ 9 │ UInt64 │ + ├────────┼────────────────────┤ + 11. │ 10 │ UInt64 │ + ├────────┼────────────────────┤ + 12. │ 11 │ UInt64 │ + ├────────┼────────────────────┤ + 13. │ 12 │ UInt64 │ + ├────────┼────────────────────┤ + 14. │ 13 │ UInt64 │ + ├────────┼────────────────────┤ + 15. │ 14 │ UInt64 │ + ├────────┼────────────────────┤ + 16. │ 15 │ UInt64 │ + ├────────┼────────────────────┤ + 17. │ 16 │ UInt64 │ + ├────────┼────────────────────┤ + 18. │ 17 │ UInt64 │ + ├────────┼────────────────────┤ + 19. │ 18 │ UInt64 │ + ├────────┼────────────────────┤ + 20. │ 19 │ UInt64 │ + ├────────┼────────────────────┤ + 21. │ 20 │ UInt64 │ + ├────────┼────────────────────┤ + 22. │ 21 │ UInt64 │ + ├────────┼────────────────────┤ + 23. │ 22 │ UInt64 │ + ├────────┼────────────────────┤ + 24. │ 23 │ UInt64 │ + ├────────┼────────────────────┤ + 25. │ 24 │ UInt64 │ + ├────────┼────────────────────┤ + 26. │ 25 │ UInt64 │ + ├────────┼────────────────────┤ + 27. │ 26 │ UInt64 │ + ├────────┼────────────────────┤ + 28. │ 27 │ UInt64 │ + ├────────┼────────────────────┤ + 29. │ 28 │ UInt64 │ + ├────────┼────────────────────┤ + 30. │ 29 │ UInt64 │ + ├────────┼────────────────────┤ + 31. │ 30 │ UInt64 │ + ├────────┼────────────────────┤ + 32. │ 31 │ UInt64 │ + ├────────┼────────────────────┤ + 33. │ 32 │ UInt64 │ + ├────────┼────────────────────┤ + 34. │ 33 │ UInt64 │ + ├────────┼────────────────────┤ + 35. │ 34 │ UInt64 │ + ├────────┼────────────────────┤ + 36. │ 35 │ UInt64 │ + ├────────┼────────────────────┤ + 37. │ 36 │ UInt64 │ + ├────────┼────────────────────┤ + 38. │ 37 │ UInt64 │ + ├────────┼────────────────────┤ + 39. │ 38 │ UInt64 │ + ├────────┼────────────────────┤ + 40. │ 39 │ UInt64 │ + ├────────┼────────────────────┤ + 41. │ 40 │ UInt64 │ + ├────────┼────────────────────┤ + 42. │ 41 │ UInt64 │ + ├────────┼────────────────────┤ + 43. │ 42 │ UInt64 │ + ├────────┼────────────────────┤ + 44. │ 43 │ UInt64 │ + ├────────┼────────────────────┤ + 45. │ 44 │ UInt64 │ + ├────────┼────────────────────┤ + 46. │ 45 │ UInt64 │ + ├────────┼────────────────────┤ + 47. │ 46 │ UInt64 │ + ├────────┼────────────────────┤ + 48. │ 47 │ UInt64 │ + ├────────┼────────────────────┤ + 49. │ 48 │ UInt64 │ + ├────────┼────────────────────┤ + 50. │ 49 │ UInt64 │ + ├────────┼────────────────────┤ + 51. │ 50 │ UInt64 │ + ├────────┼────────────────────┤ + 52. │ 51 │ UInt64 │ + ├────────┼────────────────────┤ + 53. │ 52 │ UInt64 │ + ├────────┼────────────────────┤ + 54. │ 53 │ UInt64 │ + ├────────┼────────────────────┤ + 55. │ 54 │ UInt64 │ + ├────────┼────────────────────┤ + 56. │ 55 │ UInt64 │ + ├────────┼────────────────────┤ + 57. │ 56 │ UInt64 │ + ├────────┼────────────────────┤ + 58. │ 57 │ UInt64 │ + ├────────┼────────────────────┤ + 59. │ 58 │ UInt64 │ + ├────────┼────────────────────┤ + 60. │ 59 │ UInt64 │ + ├────────┼────────────────────┤ + 61. │ 60 │ UInt64 │ + ├────────┼────────────────────┤ + 62. │ 61 │ UInt64 │ + ├────────┼────────────────────┤ + 63. │ 62 │ UInt64 │ + ├────────┼────────────────────┤ + 64. │ 63 │ UInt64 │ + ├────────┼────────────────────┤ + 65. │ 64 │ UInt64 │ + ├────────┼────────────────────┤ + 66. │ 65 │ UInt64 │ + ├────────┼────────────────────┤ + 67. │ 66 │ UInt64 │ + ├────────┼────────────────────┤ + 68. │ 67 │ UInt64 │ + ├────────┼────────────────────┤ + 69. │ 68 │ UInt64 │ + ├────────┼────────────────────┤ + 70. │ 69 │ UInt64 │ + ├────────┼────────────────────┤ + 71. │ 70 │ UInt64 │ + ├────────┼────────────────────┤ + 72. │ 71 │ UInt64 │ + ├────────┼────────────────────┤ + 73. │ 72 │ UInt64 │ + ├────────┼────────────────────┤ + 74. │ 73 │ UInt64 │ + ├────────┼────────────────────┤ + 75. │ 74 │ UInt64 │ + ├────────┼────────────────────┤ + 76. │ 75 │ UInt64 │ + ├────────┼────────────────────┤ + 77. │ 76 │ UInt64 │ + ├────────┼────────────────────┤ + 78. │ 77 │ UInt64 │ + ├────────┼────────────────────┤ + 79. │ 78 │ UInt64 │ + ├────────┼────────────────────┤ + 80. │ 79 │ UInt64 │ + ├────────┼────────────────────┤ + 81. │ 80 │ UInt64 │ + ├────────┼────────────────────┤ + 82. │ 81 │ UInt64 │ + ├────────┼────────────────────┤ + 83. │ 82 │ UInt64 │ + ├────────┼────────────────────┤ + 84. │ 83 │ UInt64 │ + ├────────┼────────────────────┤ + 85. │ 84 │ UInt64 │ + ├────────┼────────────────────┤ + 86. │ 85 │ UInt64 │ + ├────────┼────────────────────┤ + 87. │ 86 │ UInt64 │ + ├────────┼────────────────────┤ + 88. │ 87 │ UInt64 │ + ├────────┼────────────────────┤ + 89. │ 88 │ UInt64 │ + ├────────┼────────────────────┤ + 90. │ 89 │ UInt64 │ + ├────────┼────────────────────┤ + 91. │ 90 │ UInt64 │ + ├────────┼────────────────────┤ + 92. │ 91 │ UInt64 │ + ├────────┼────────────────────┤ + 93. │ 92 │ UInt64 │ + ├────────┼────────────────────┤ + 94. │ 93 │ UInt64 │ + ├────────┼────────────────────┤ + 95. │ 94 │ UInt64 │ + ├────────┼────────────────────┤ + 96. │ 95 │ UInt64 │ + ├────────┼────────────────────┤ + 97. │ 96 │ UInt64 │ + ├────────┼────────────────────┤ + 98. │ 97 │ UInt64 │ + ├────────┼────────────────────┤ + 99. │ 98 │ UInt64 │ + ├────────┼────────────────────┤ +100. │ 99 │ UInt64 │ + ┣━━━━━━━━╋━━━━━━━━━━━━━━━━━━━━┫ + ┃ number ┃ toTypeName(number) ┃ + ┗━━━━━━━━┻━━━━━━━━━━━━━━━━━━━━┛ + ┌─number─┬─toTypeName(number)─┐ + 1. │ 0 │ UInt64 │ + 2. │ 1 │ UInt64 │ + 3. │ 2 │ UInt64 │ + 4. │ 3 │ UInt64 │ + 5. │ 4 │ UInt64 │ + 6. │ 5 │ UInt64 │ + 7. │ 6 │ UInt64 │ + 8. │ 7 │ UInt64 │ + 9. │ 8 │ UInt64 │ + 10. │ 9 │ UInt64 │ + 11. │ 10 │ UInt64 │ + 12. │ 11 │ UInt64 │ + 13. │ 12 │ UInt64 │ + 14. │ 13 │ UInt64 │ + 15. │ 14 │ UInt64 │ + 16. │ 15 │ UInt64 │ + 17. │ 16 │ UInt64 │ + 18. │ 17 │ UInt64 │ + 19. │ 18 │ UInt64 │ + 20. │ 19 │ UInt64 │ + 21. │ 20 │ UInt64 │ + 22. │ 21 │ UInt64 │ + 23. │ 22 │ UInt64 │ + 24. │ 23 │ UInt64 │ + 25. │ 24 │ UInt64 │ + 26. │ 25 │ UInt64 │ + 27. │ 26 │ UInt64 │ + 28. │ 27 │ UInt64 │ + 29. │ 28 │ UInt64 │ + 30. │ 29 │ UInt64 │ + 31. │ 30 │ UInt64 │ + 32. │ 31 │ UInt64 │ + 33. │ 32 │ UInt64 │ + 34. │ 33 │ UInt64 │ + 35. │ 34 │ UInt64 │ + 36. │ 35 │ UInt64 │ + 37. │ 36 │ UInt64 │ + 38. │ 37 │ UInt64 │ + 39. │ 38 │ UInt64 │ + 40. │ 39 │ UInt64 │ + 41. │ 40 │ UInt64 │ + 42. │ 41 │ UInt64 │ + 43. │ 42 │ UInt64 │ + 44. │ 43 │ UInt64 │ + 45. │ 44 │ UInt64 │ + 46. │ 45 │ UInt64 │ + 47. │ 46 │ UInt64 │ + 48. │ 47 │ UInt64 │ + 49. │ 48 │ UInt64 │ + 50. │ 49 │ UInt64 │ + 51. │ 50 │ UInt64 │ + 52. │ 51 │ UInt64 │ + 53. │ 52 │ UInt64 │ + 54. │ 53 │ UInt64 │ + 55. │ 54 │ UInt64 │ + 56. │ 55 │ UInt64 │ + 57. │ 56 │ UInt64 │ + 58. │ 57 │ UInt64 │ + 59. │ 58 │ UInt64 │ + 60. │ 59 │ UInt64 │ + 61. │ 60 │ UInt64 │ + 62. │ 61 │ UInt64 │ + 63. │ 62 │ UInt64 │ + 64. │ 63 │ UInt64 │ + 65. │ 64 │ UInt64 │ + 66. │ 65 │ UInt64 │ + 67. │ 66 │ UInt64 │ + 68. │ 67 │ UInt64 │ + 69. │ 68 │ UInt64 │ + 70. │ 69 │ UInt64 │ + 71. │ 70 │ UInt64 │ + 72. │ 71 │ UInt64 │ + 73. │ 72 │ UInt64 │ + 74. │ 73 │ UInt64 │ + 75. │ 74 │ UInt64 │ + 76. │ 75 │ UInt64 │ + 77. │ 76 │ UInt64 │ + 78. │ 77 │ UInt64 │ + 79. │ 78 │ UInt64 │ + 80. │ 79 │ UInt64 │ + 81. │ 80 │ UInt64 │ + 82. │ 81 │ UInt64 │ + 83. │ 82 │ UInt64 │ + 84. │ 83 │ UInt64 │ + 85. │ 84 │ UInt64 │ + 86. │ 85 │ UInt64 │ + 87. │ 86 │ UInt64 │ + 88. │ 87 │ UInt64 │ + 89. │ 88 │ UInt64 │ + 90. │ 89 │ UInt64 │ + 91. │ 90 │ UInt64 │ + 92. │ 91 │ UInt64 │ + 93. │ 92 │ UInt64 │ + 94. │ 93 │ UInt64 │ + 95. │ 94 │ UInt64 │ + 96. │ 95 │ UInt64 │ + 97. │ 96 │ UInt64 │ + 98. │ 97 │ UInt64 │ + 99. │ 98 │ UInt64 │ +100. │ 99 │ UInt64 │ + └────────┴────────────────────┘ + ┌─number─┬─toTypeName(number)─┐ + 1. │ 0 │ UInt64 │ + 2. │ 1 │ UInt64 │ + 3. │ 2 │ UInt64 │ + 4. │ 3 │ UInt64 │ + 5. │ 4 │ UInt64 │ + 6. │ 5 │ UInt64 │ + 7. │ 6 │ UInt64 │ + 8. │ 7 │ UInt64 │ + 9. │ 8 │ UInt64 │ + 10. │ 9 │ UInt64 │ + 11. │ 10 │ UInt64 │ + 12. │ 11 │ UInt64 │ + 13. │ 12 │ UInt64 │ + 14. │ 13 │ UInt64 │ + 15. │ 14 │ UInt64 │ + 16. │ 15 │ UInt64 │ + 17. │ 16 │ UInt64 │ + 18. │ 17 │ UInt64 │ + 19. │ 18 │ UInt64 │ + 20. │ 19 │ UInt64 │ + 21. │ 20 │ UInt64 │ + 22. │ 21 │ UInt64 │ + 23. │ 22 │ UInt64 │ + 24. │ 23 │ UInt64 │ + 25. │ 24 │ UInt64 │ + 26. │ 25 │ UInt64 │ + 27. │ 26 │ UInt64 │ + 28. │ 27 │ UInt64 │ + 29. │ 28 │ UInt64 │ + 30. │ 29 │ UInt64 │ + 31. │ 30 │ UInt64 │ + 32. │ 31 │ UInt64 │ + 33. │ 32 │ UInt64 │ + 34. │ 33 │ UInt64 │ + 35. │ 34 │ UInt64 │ + 36. │ 35 │ UInt64 │ + 37. │ 36 │ UInt64 │ + 38. │ 37 │ UInt64 │ + 39. │ 38 │ UInt64 │ + 40. │ 39 │ UInt64 │ + 41. │ 40 │ UInt64 │ + 42. │ 41 │ UInt64 │ + 43. │ 42 │ UInt64 │ + 44. │ 43 │ UInt64 │ + 45. │ 44 │ UInt64 │ + 46. │ 45 │ UInt64 │ + 47. │ 46 │ UInt64 │ + 48. │ 47 │ UInt64 │ + 49. │ 48 │ UInt64 │ + 50. │ 49 │ UInt64 │ + 51. │ 50 │ UInt64 │ + 52. │ 51 │ UInt64 │ + 53. │ 52 │ UInt64 │ + 54. │ 53 │ UInt64 │ + 55. │ 54 │ UInt64 │ + 56. │ 55 │ UInt64 │ + 57. │ 56 │ UInt64 │ + 58. │ 57 │ UInt64 │ + 59. │ 58 │ UInt64 │ + 60. │ 59 │ UInt64 │ + 61. │ 60 │ UInt64 │ + 62. │ 61 │ UInt64 │ + 63. │ 62 │ UInt64 │ + 64. │ 63 │ UInt64 │ + 65. │ 64 │ UInt64 │ + 66. │ 65 │ UInt64 │ + 67. │ 66 │ UInt64 │ + 68. │ 67 │ UInt64 │ + 69. │ 68 │ UInt64 │ + 70. │ 69 │ UInt64 │ + 71. │ 70 │ UInt64 │ + 72. │ 71 │ UInt64 │ + 73. │ 72 │ UInt64 │ + 74. │ 73 │ UInt64 │ + 75. │ 74 │ UInt64 │ + 76. │ 75 │ UInt64 │ + 77. │ 76 │ UInt64 │ + 78. │ 77 │ UInt64 │ + 79. │ 78 │ UInt64 │ + 80. │ 79 │ UInt64 │ + 81. │ 80 │ UInt64 │ + 82. │ 81 │ UInt64 │ + 83. │ 82 │ UInt64 │ + 84. │ 83 │ UInt64 │ + 85. │ 84 │ UInt64 │ + 86. │ 85 │ UInt64 │ + 87. │ 86 │ UInt64 │ + 88. │ 87 │ UInt64 │ + 89. │ 88 │ UInt64 │ + 90. │ 89 │ UInt64 │ + 91. │ 90 │ UInt64 │ + 92. │ 91 │ UInt64 │ + 93. │ 92 │ UInt64 │ + 94. │ 93 │ UInt64 │ + 95. │ 94 │ UInt64 │ + 96. │ 95 │ UInt64 │ + 97. │ 96 │ UInt64 │ + 98. │ 97 │ UInt64 │ + 99. │ 98 │ UInt64 │ +100. │ 99 │ UInt64 │ + └─number─┴─toTypeName(number)─┘ + ┌─number─┬─toTypeName(number)─┐ + 1. │ 0 │ UInt64 │ + 2. │ 1 │ UInt64 │ + 3. │ 2 │ UInt64 │ + 4. │ 3 │ UInt64 │ + 5. │ 4 │ UInt64 │ + 6. │ 5 │ UInt64 │ + 7. │ 6 │ UInt64 │ + 8. │ 7 │ UInt64 │ + 9. │ 8 │ UInt64 │ + 10. │ 9 │ UInt64 │ + 11. │ 10 │ UInt64 │ + 12. │ 11 │ UInt64 │ + 13. │ 12 │ UInt64 │ + 14. │ 13 │ UInt64 │ + 15. │ 14 │ UInt64 │ + 16. │ 15 │ UInt64 │ + 17. │ 16 │ UInt64 │ + 18. │ 17 │ UInt64 │ + 19. │ 18 │ UInt64 │ + 20. │ 19 │ UInt64 │ + 21. │ 20 │ UInt64 │ + 22. │ 21 │ UInt64 │ + 23. │ 22 │ UInt64 │ + 24. │ 23 │ UInt64 │ + 25. │ 24 │ UInt64 │ + 26. │ 25 │ UInt64 │ + 27. │ 26 │ UInt64 │ + 28. │ 27 │ UInt64 │ + 29. │ 28 │ UInt64 │ + 30. │ 29 │ UInt64 │ + 31. │ 30 │ UInt64 │ + 32. │ 31 │ UInt64 │ + 33. │ 32 │ UInt64 │ + 34. │ 33 │ UInt64 │ + 35. │ 34 │ UInt64 │ + 36. │ 35 │ UInt64 │ + 37. │ 36 │ UInt64 │ + 38. │ 37 │ UInt64 │ + 39. │ 38 │ UInt64 │ + 40. │ 39 │ UInt64 │ + 41. │ 40 │ UInt64 │ + 42. │ 41 │ UInt64 │ + 43. │ 42 │ UInt64 │ + 44. │ 43 │ UInt64 │ + 45. │ 44 │ UInt64 │ + 46. │ 45 │ UInt64 │ + 47. │ 46 │ UInt64 │ + 48. │ 47 │ UInt64 │ + 49. │ 48 │ UInt64 │ + 50. │ 49 │ UInt64 │ + 51. │ 50 │ UInt64 │ + 52. │ 51 │ UInt64 │ + 53. │ 52 │ UInt64 │ + 54. │ 53 │ UInt64 │ + 55. │ 54 │ UInt64 │ + 56. │ 55 │ UInt64 │ + 57. │ 56 │ UInt64 │ + 58. │ 57 │ UInt64 │ + 59. │ 58 │ UInt64 │ + 60. │ 59 │ UInt64 │ + 61. │ 60 │ UInt64 │ + 62. │ 61 │ UInt64 │ + 63. │ 62 │ UInt64 │ + 64. │ 63 │ UInt64 │ + 65. │ 64 │ UInt64 │ + 66. │ 65 │ UInt64 │ + 67. │ 66 │ UInt64 │ + 68. │ 67 │ UInt64 │ + 69. │ 68 │ UInt64 │ + 70. │ 69 │ UInt64 │ + 71. │ 70 │ UInt64 │ + 72. │ 71 │ UInt64 │ + 73. │ 72 │ UInt64 │ + 74. │ 73 │ UInt64 │ + 75. │ 74 │ UInt64 │ + 76. │ 75 │ UInt64 │ + 77. │ 76 │ UInt64 │ + 78. │ 77 │ UInt64 │ + 79. │ 78 │ UInt64 │ + 80. │ 79 │ UInt64 │ + 81. │ 80 │ UInt64 │ + 82. │ 81 │ UInt64 │ + 83. │ 82 │ UInt64 │ + 84. │ 83 │ UInt64 │ + 85. │ 84 │ UInt64 │ + 86. │ 85 │ UInt64 │ + 87. │ 86 │ UInt64 │ + 88. │ 87 │ UInt64 │ + 89. │ 88 │ UInt64 │ + 90. │ 89 │ UInt64 │ + 91. │ 90 │ UInt64 │ + 92. │ 91 │ UInt64 │ + 93. │ 92 │ UInt64 │ + 94. │ 93 │ UInt64 │ + 95. │ 94 │ UInt64 │ + 96. │ 95 │ UInt64 │ + 97. │ 96 │ UInt64 │ + 98. │ 97 │ UInt64 │ + 99. │ 98 │ UInt64 │ +100. │ 99 │ UInt64 │ + └─number─┴─toTypeName(number)─┘ + ┌─number─┬─toTypeName(number)─┐ + 1. │ 0 │ UInt64 │ + 2. │ 1 │ UInt64 │ + 3. │ 2 │ UInt64 │ + 4. │ 3 │ UInt64 │ + 5. │ 4 │ UInt64 │ + 6. │ 5 │ UInt64 │ + 7. │ 6 │ UInt64 │ + 8. │ 7 │ UInt64 │ + 9. │ 8 │ UInt64 │ + 10. │ 9 │ UInt64 │ + 11. │ 10 │ UInt64 │ + 12. │ 11 │ UInt64 │ + 13. │ 12 │ UInt64 │ + 14. │ 13 │ UInt64 │ + 15. │ 14 │ UInt64 │ + 16. │ 15 │ UInt64 │ + 17. │ 16 │ UInt64 │ + 18. │ 17 │ UInt64 │ + 19. │ 18 │ UInt64 │ + 20. │ 19 │ UInt64 │ + 21. │ 20 │ UInt64 │ + 22. │ 21 │ UInt64 │ + 23. │ 22 │ UInt64 │ + 24. │ 23 │ UInt64 │ + 25. │ 24 │ UInt64 │ + 26. │ 25 │ UInt64 │ + 27. │ 26 │ UInt64 │ + 28. │ 27 │ UInt64 │ + 29. │ 28 │ UInt64 │ + 30. │ 29 │ UInt64 │ + 31. │ 30 │ UInt64 │ + 32. │ 31 │ UInt64 │ + 33. │ 32 │ UInt64 │ + 34. │ 33 │ UInt64 │ + 35. │ 34 │ UInt64 │ + 36. │ 35 │ UInt64 │ + 37. │ 36 │ UInt64 │ + 38. │ 37 │ UInt64 │ + 39. │ 38 │ UInt64 │ + 40. │ 39 │ UInt64 │ + 41. │ 40 │ UInt64 │ + 42. │ 41 │ UInt64 │ + 43. │ 42 │ UInt64 │ + 44. │ 43 │ UInt64 │ + 45. │ 44 │ UInt64 │ + 46. │ 45 │ UInt64 │ + 47. │ 46 │ UInt64 │ + 48. │ 47 │ UInt64 │ + 49. │ 48 │ UInt64 │ + 50. │ 49 │ UInt64 │ + 51. │ 50 │ UInt64 │ + 52. │ 51 │ UInt64 │ + 53. │ 52 │ UInt64 │ + 54. │ 53 │ UInt64 │ + 55. │ 54 │ UInt64 │ + 56. │ 55 │ UInt64 │ + 57. │ 56 │ UInt64 │ + 58. │ 57 │ UInt64 │ + 59. │ 58 │ UInt64 │ + 60. │ 59 │ UInt64 │ + 61. │ 60 │ UInt64 │ + 62. │ 61 │ UInt64 │ + 63. │ 62 │ UInt64 │ + 64. │ 63 │ UInt64 │ + 65. │ 64 │ UInt64 │ + 66. │ 65 │ UInt64 │ + 67. │ 66 │ UInt64 │ + 68. │ 67 │ UInt64 │ + 69. │ 68 │ UInt64 │ + 70. │ 69 │ UInt64 │ + 71. │ 70 │ UInt64 │ + 72. │ 71 │ UInt64 │ + 73. │ 72 │ UInt64 │ + 74. │ 73 │ UInt64 │ + 75. │ 74 │ UInt64 │ + 76. │ 75 │ UInt64 │ + 77. │ 76 │ UInt64 │ + 78. │ 77 │ UInt64 │ + 79. │ 78 │ UInt64 │ + 80. │ 79 │ UInt64 │ + 81. │ 80 │ UInt64 │ + 82. │ 81 │ UInt64 │ + 83. │ 82 │ UInt64 │ + 84. │ 83 │ UInt64 │ + 85. │ 84 │ UInt64 │ + 86. │ 85 │ UInt64 │ + 87. │ 86 │ UInt64 │ + 88. │ 87 │ UInt64 │ + 89. │ 88 │ UInt64 │ + 90. │ 89 │ UInt64 │ + 91. │ 90 │ UInt64 │ + 92. │ 91 │ UInt64 │ + 93. │ 92 │ UInt64 │ + 94. │ 93 │ UInt64 │ + 95. │ 94 │ UInt64 │ + 96. │ 95 │ UInt64 │ + 97. │ 96 │ UInt64 │ + 98. │ 97 │ UInt64 │ + 99. │ 98 │ UInt64 │ +100. │ 99 │ UInt64 │ + └─number─┴─toTypeName(number)─┘ + ┌─number─┬─toTypeName(number)─┐ + 1. │ 0 │ UInt64 │ + 2. │ 1 │ UInt64 │ + 3. │ 2 │ UInt64 │ + 4. │ 3 │ UInt64 │ + 5. │ 4 │ UInt64 │ + 6. │ 5 │ UInt64 │ + 7. │ 6 │ UInt64 │ + 8. │ 7 │ UInt64 │ + 9. │ 8 │ UInt64 │ + 10. │ 9 │ UInt64 │ + 11. │ 10 │ UInt64 │ + 12. │ 11 │ UInt64 │ + 13. │ 12 │ UInt64 │ + 14. │ 13 │ UInt64 │ + 15. │ 14 │ UInt64 │ + 16. │ 15 │ UInt64 │ + 17. │ 16 │ UInt64 │ + 18. │ 17 │ UInt64 │ + 19. │ 18 │ UInt64 │ + 20. │ 19 │ UInt64 │ + 21. │ 20 │ UInt64 │ + 22. │ 21 │ UInt64 │ + 23. │ 22 │ UInt64 │ + 24. │ 23 │ UInt64 │ + 25. │ 24 │ UInt64 │ + 26. │ 25 │ UInt64 │ + 27. │ 26 │ UInt64 │ + 28. │ 27 │ UInt64 │ + 29. │ 28 │ UInt64 │ + 30. │ 29 │ UInt64 │ + 31. │ 30 │ UInt64 │ + 32. │ 31 │ UInt64 │ + 33. │ 32 │ UInt64 │ + 34. │ 33 │ UInt64 │ + 35. │ 34 │ UInt64 │ + 36. │ 35 │ UInt64 │ + 37. │ 36 │ UInt64 │ + 38. │ 37 │ UInt64 │ + 39. │ 38 │ UInt64 │ + 40. │ 39 │ UInt64 │ + 41. │ 40 │ UInt64 │ + 42. │ 41 │ UInt64 │ + 43. │ 42 │ UInt64 │ + 44. │ 43 │ UInt64 │ + 45. │ 44 │ UInt64 │ + 46. │ 45 │ UInt64 │ + 47. │ 46 │ UInt64 │ + 48. │ 47 │ UInt64 │ + 49. │ 48 │ UInt64 │ + 50. │ 49 │ UInt64 │ + 51. │ 50 │ UInt64 │ + 52. │ 51 │ UInt64 │ + 53. │ 52 │ UInt64 │ + 54. │ 53 │ UInt64 │ + 55. │ 54 │ UInt64 │ + 56. │ 55 │ UInt64 │ + 57. │ 56 │ UInt64 │ + 58. │ 57 │ UInt64 │ + 59. │ 58 │ UInt64 │ + 60. │ 59 │ UInt64 │ + 61. │ 60 │ UInt64 │ + 62. │ 61 │ UInt64 │ + 63. │ 62 │ UInt64 │ + 64. │ 63 │ UInt64 │ + 65. │ 64 │ UInt64 │ + 66. │ 65 │ UInt64 │ + 67. │ 66 │ UInt64 │ + 68. │ 67 │ UInt64 │ + 69. │ 68 │ UInt64 │ + 70. │ 69 │ UInt64 │ + 71. │ 70 │ UInt64 │ + 72. │ 71 │ UInt64 │ + 73. │ 72 │ UInt64 │ + 74. │ 73 │ UInt64 │ + 75. │ 74 │ UInt64 │ + 76. │ 75 │ UInt64 │ + 77. │ 76 │ UInt64 │ + 78. │ 77 │ UInt64 │ + 79. │ 78 │ UInt64 │ + 80. │ 79 │ UInt64 │ + 81. │ 80 │ UInt64 │ + 82. │ 81 │ UInt64 │ + 83. │ 82 │ UInt64 │ + 84. │ 83 │ UInt64 │ + 85. │ 84 │ UInt64 │ + 86. │ 85 │ UInt64 │ + 87. │ 86 │ UInt64 │ + 88. │ 87 │ UInt64 │ + 89. │ 88 │ UInt64 │ + 90. │ 89 │ UInt64 │ + 91. │ 90 │ UInt64 │ + 92. │ 91 │ UInt64 │ + 93. │ 92 │ UInt64 │ + 94. │ 93 │ UInt64 │ + 95. │ 94 │ UInt64 │ + 96. │ 95 │ UInt64 │ + 97. │ 96 │ UInt64 │ + 98. │ 97 │ UInt64 │ + 99. │ 98 │ UInt64 │ +100. │ 99 │ UInt64 │ + └─number─┴─toTypeName(number)─┘ + number toTypeName(number) + + 1. 0 UInt64 + 2. 1 UInt64 + 3. 2 UInt64 + 4. 3 UInt64 + 5. 4 UInt64 + 6. 5 UInt64 + 7. 6 UInt64 + 8. 7 UInt64 + 9. 8 UInt64 + 10. 9 UInt64 + 11. 10 UInt64 + 12. 11 UInt64 + 13. 12 UInt64 + 14. 13 UInt64 + 15. 14 UInt64 + 16. 15 UInt64 + 17. 16 UInt64 + 18. 17 UInt64 + 19. 18 UInt64 + 20. 19 UInt64 + 21. 20 UInt64 + 22. 21 UInt64 + 23. 22 UInt64 + 24. 23 UInt64 + 25. 24 UInt64 + 26. 25 UInt64 + 27. 26 UInt64 + 28. 27 UInt64 + 29. 28 UInt64 + 30. 29 UInt64 + 31. 30 UInt64 + 32. 31 UInt64 + 33. 32 UInt64 + 34. 33 UInt64 + 35. 34 UInt64 + 36. 35 UInt64 + 37. 36 UInt64 + 38. 37 UInt64 + 39. 38 UInt64 + 40. 39 UInt64 + 41. 40 UInt64 + 42. 41 UInt64 + 43. 42 UInt64 + 44. 43 UInt64 + 45. 44 UInt64 + 46. 45 UInt64 + 47. 46 UInt64 + 48. 47 UInt64 + 49. 48 UInt64 + 50. 49 UInt64 + 51. 50 UInt64 + 52. 51 UInt64 + 53. 52 UInt64 + 54. 53 UInt64 + 55. 54 UInt64 + 56. 55 UInt64 + 57. 56 UInt64 + 58. 57 UInt64 + 59. 58 UInt64 + 60. 59 UInt64 + 61. 60 UInt64 + 62. 61 UInt64 + 63. 62 UInt64 + 64. 63 UInt64 + 65. 64 UInt64 + 66. 65 UInt64 + 67. 66 UInt64 + 68. 67 UInt64 + 69. 68 UInt64 + 70. 69 UInt64 + 71. 70 UInt64 + 72. 71 UInt64 + 73. 72 UInt64 + 74. 73 UInt64 + 75. 74 UInt64 + 76. 75 UInt64 + 77. 76 UInt64 + 78. 77 UInt64 + 79. 78 UInt64 + 80. 79 UInt64 + 81. 80 UInt64 + 82. 81 UInt64 + 83. 82 UInt64 + 84. 83 UInt64 + 85. 84 UInt64 + 86. 85 UInt64 + 87. 86 UInt64 + 88. 87 UInt64 + 89. 88 UInt64 + 90. 89 UInt64 + 91. 90 UInt64 + 92. 91 UInt64 + 93. 92 UInt64 + 94. 93 UInt64 + 95. 94 UInt64 + 96. 95 UInt64 + 97. 96 UInt64 + 98. 97 UInt64 + 99. 98 UInt64 +100. 99 UInt64 + number toTypeName(number) + + 1. 0 UInt64 + 2. 1 UInt64 + 3. 2 UInt64 + 4. 3 UInt64 + 5. 4 UInt64 + 6. 5 UInt64 + 7. 6 UInt64 + 8. 7 UInt64 + 9. 8 UInt64 + 10. 9 UInt64 + 11. 10 UInt64 + 12. 11 UInt64 + 13. 12 UInt64 + 14. 13 UInt64 + 15. 14 UInt64 + 16. 15 UInt64 + 17. 16 UInt64 + 18. 17 UInt64 + 19. 18 UInt64 + 20. 19 UInt64 + 21. 20 UInt64 + 22. 21 UInt64 + 23. 22 UInt64 + 24. 23 UInt64 + 25. 24 UInt64 + 26. 25 UInt64 + 27. 26 UInt64 + 28. 27 UInt64 + 29. 28 UInt64 + 30. 29 UInt64 + 31. 30 UInt64 + 32. 31 UInt64 + 33. 32 UInt64 + 34. 33 UInt64 + 35. 34 UInt64 + 36. 35 UInt64 + 37. 36 UInt64 + 38. 37 UInt64 + 39. 38 UInt64 + 40. 39 UInt64 + 41. 40 UInt64 + 42. 41 UInt64 + 43. 42 UInt64 + 44. 43 UInt64 + 45. 44 UInt64 + 46. 45 UInt64 + 47. 46 UInt64 + 48. 47 UInt64 + 49. 48 UInt64 + 50. 49 UInt64 + 51. 50 UInt64 + 52. 51 UInt64 + 53. 52 UInt64 + 54. 53 UInt64 + 55. 54 UInt64 + 56. 55 UInt64 + 57. 56 UInt64 + 58. 57 UInt64 + 59. 58 UInt64 + 60. 59 UInt64 + 61. 60 UInt64 + 62. 61 UInt64 + 63. 62 UInt64 + 64. 63 UInt64 + 65. 64 UInt64 + 66. 65 UInt64 + 67. 66 UInt64 + 68. 67 UInt64 + 69. 68 UInt64 + 70. 69 UInt64 + 71. 70 UInt64 + 72. 71 UInt64 + 73. 72 UInt64 + 74. 73 UInt64 + 75. 74 UInt64 + 76. 75 UInt64 + 77. 76 UInt64 + 78. 77 UInt64 + 79. 78 UInt64 + 80. 79 UInt64 + 81. 80 UInt64 + 82. 81 UInt64 + 83. 82 UInt64 + 84. 83 UInt64 + 85. 84 UInt64 + 86. 85 UInt64 + 87. 86 UInt64 + 88. 87 UInt64 + 89. 88 UInt64 + 90. 89 UInt64 + 91. 90 UInt64 + 92. 91 UInt64 + 93. 92 UInt64 + 94. 93 UInt64 + 95. 94 UInt64 + 96. 95 UInt64 + 97. 96 UInt64 + 98. 97 UInt64 + 99. 98 UInt64 +100. 99 UInt64 + + number toTypeName(number) + number toTypeName(number) + + 1. 0 UInt64 + 2. 1 UInt64 + 3. 2 UInt64 + 4. 3 UInt64 + 5. 4 UInt64 + 6. 5 UInt64 + 7. 6 UInt64 + 8. 7 UInt64 + 9. 8 UInt64 + 10. 9 UInt64 + 11. 10 UInt64 + 12. 11 UInt64 + 13. 12 UInt64 + 14. 13 UInt64 + 15. 14 UInt64 + 16. 15 UInt64 + 17. 16 UInt64 + 18. 17 UInt64 + 19. 18 UInt64 + 20. 19 UInt64 + 21. 20 UInt64 + 22. 21 UInt64 + 23. 22 UInt64 + 24. 23 UInt64 + 25. 24 UInt64 + 26. 25 UInt64 + 27. 26 UInt64 + 28. 27 UInt64 + 29. 28 UInt64 + 30. 29 UInt64 + 31. 30 UInt64 + 32. 31 UInt64 + 33. 32 UInt64 + 34. 33 UInt64 + 35. 34 UInt64 + 36. 35 UInt64 + 37. 36 UInt64 + 38. 37 UInt64 + 39. 38 UInt64 + 40. 39 UInt64 + 41. 40 UInt64 + 42. 41 UInt64 + 43. 42 UInt64 + 44. 43 UInt64 + 45. 44 UInt64 + 46. 45 UInt64 + 47. 46 UInt64 + 48. 47 UInt64 + 49. 48 UInt64 + 50. 49 UInt64 + 51. 50 UInt64 + 52. 51 UInt64 + 53. 52 UInt64 + 54. 53 UInt64 + 55. 54 UInt64 + 56. 55 UInt64 + 57. 56 UInt64 + 58. 57 UInt64 + 59. 58 UInt64 + 60. 59 UInt64 + 61. 60 UInt64 + 62. 61 UInt64 + 63. 62 UInt64 + 64. 63 UInt64 + 65. 64 UInt64 + 66. 65 UInt64 + 67. 66 UInt64 + 68. 67 UInt64 + 69. 68 UInt64 + 70. 69 UInt64 + 71. 70 UInt64 + 72. 71 UInt64 + 73. 72 UInt64 + 74. 73 UInt64 + 75. 74 UInt64 + 76. 75 UInt64 + 77. 76 UInt64 + 78. 77 UInt64 + 79. 78 UInt64 + 80. 79 UInt64 + 81. 80 UInt64 + 82. 81 UInt64 + 83. 82 UInt64 + 84. 83 UInt64 + 85. 84 UInt64 + 86. 85 UInt64 + 87. 86 UInt64 + 88. 87 UInt64 + 89. 88 UInt64 + 90. 89 UInt64 + 91. 90 UInt64 + 92. 91 UInt64 + 93. 92 UInt64 + 94. 93 UInt64 + 95. 94 UInt64 + 96. 95 UInt64 + 97. 96 UInt64 + 98. 97 UInt64 + 99. 98 UInt64 +100. 99 UInt64 + + number toTypeName(number) + number toTypeName(number) + + 1. 0 UInt64 + 2. 1 UInt64 + 3. 2 UInt64 + 4. 3 UInt64 + 5. 4 UInt64 + 6. 5 UInt64 + 7. 6 UInt64 + 8. 7 UInt64 + 9. 8 UInt64 + 10. 9 UInt64 + 11. 10 UInt64 + 12. 11 UInt64 + 13. 12 UInt64 + 14. 13 UInt64 + 15. 14 UInt64 + 16. 15 UInt64 + 17. 16 UInt64 + 18. 17 UInt64 + 19. 18 UInt64 + 20. 19 UInt64 + 21. 20 UInt64 + 22. 21 UInt64 + 23. 22 UInt64 + 24. 23 UInt64 + 25. 24 UInt64 + 26. 25 UInt64 + 27. 26 UInt64 + 28. 27 UInt64 + 29. 28 UInt64 + 30. 29 UInt64 + 31. 30 UInt64 + 32. 31 UInt64 + 33. 32 UInt64 + 34. 33 UInt64 + 35. 34 UInt64 + 36. 35 UInt64 + 37. 36 UInt64 + 38. 37 UInt64 + 39. 38 UInt64 + 40. 39 UInt64 + 41. 40 UInt64 + 42. 41 UInt64 + 43. 42 UInt64 + 44. 43 UInt64 + 45. 44 UInt64 + 46. 45 UInt64 + 47. 46 UInt64 + 48. 47 UInt64 + 49. 48 UInt64 + 50. 49 UInt64 + 51. 50 UInt64 + 52. 51 UInt64 + 53. 52 UInt64 + 54. 53 UInt64 + 55. 54 UInt64 + 56. 55 UInt64 + 57. 56 UInt64 + 58. 57 UInt64 + 59. 58 UInt64 + 60. 59 UInt64 + 61. 60 UInt64 + 62. 61 UInt64 + 63. 62 UInt64 + 64. 63 UInt64 + 65. 64 UInt64 + 66. 65 UInt64 + 67. 66 UInt64 + 68. 67 UInt64 + 69. 68 UInt64 + 70. 69 UInt64 + 71. 70 UInt64 + 72. 71 UInt64 + 73. 72 UInt64 + 74. 73 UInt64 + 75. 74 UInt64 + 76. 75 UInt64 + 77. 76 UInt64 + 78. 77 UInt64 + 79. 78 UInt64 + 80. 79 UInt64 + 81. 80 UInt64 + 82. 81 UInt64 + 83. 82 UInt64 + 84. 83 UInt64 + 85. 84 UInt64 + 86. 85 UInt64 + 87. 86 UInt64 + 88. 87 UInt64 + 89. 88 UInt64 + 90. 89 UInt64 + 91. 90 UInt64 + 92. 91 UInt64 + 93. 92 UInt64 + 94. 93 UInt64 + 95. 94 UInt64 + 96. 95 UInt64 + 97. 96 UInt64 + 98. 97 UInt64 + 99. 98 UInt64 +100. 99 UInt64 + + number toTypeName(number) + number toTypeName(number) + + 1. 0 UInt64 + 2. 1 UInt64 + 3. 2 UInt64 + 4. 3 UInt64 + 5. 4 UInt64 + 6. 5 UInt64 + 7. 6 UInt64 + 8. 7 UInt64 + 9. 8 UInt64 + 10. 9 UInt64 + 11. 10 UInt64 + 12. 11 UInt64 + 13. 12 UInt64 + 14. 13 UInt64 + 15. 14 UInt64 + 16. 15 UInt64 + 17. 16 UInt64 + 18. 17 UInt64 + 19. 18 UInt64 + 20. 19 UInt64 + 21. 20 UInt64 + 22. 21 UInt64 + 23. 22 UInt64 + 24. 23 UInt64 + 25. 24 UInt64 + 26. 25 UInt64 + 27. 26 UInt64 + 28. 27 UInt64 + 29. 28 UInt64 + 30. 29 UInt64 + 31. 30 UInt64 + 32. 31 UInt64 + 33. 32 UInt64 + 34. 33 UInt64 + 35. 34 UInt64 + 36. 35 UInt64 + 37. 36 UInt64 + 38. 37 UInt64 + 39. 38 UInt64 + 40. 39 UInt64 + 41. 40 UInt64 + 42. 41 UInt64 + 43. 42 UInt64 + 44. 43 UInt64 + 45. 44 UInt64 + 46. 45 UInt64 + 47. 46 UInt64 + 48. 47 UInt64 + 49. 48 UInt64 + 50. 49 UInt64 + 51. 50 UInt64 + 52. 51 UInt64 + 53. 52 UInt64 + 54. 53 UInt64 + 55. 54 UInt64 + 56. 55 UInt64 + 57. 56 UInt64 + 58. 57 UInt64 + 59. 58 UInt64 + 60. 59 UInt64 + 61. 60 UInt64 + 62. 61 UInt64 + 63. 62 UInt64 + 64. 63 UInt64 + 65. 64 UInt64 + 66. 65 UInt64 + 67. 66 UInt64 + 68. 67 UInt64 + 69. 68 UInt64 + 70. 69 UInt64 + 71. 70 UInt64 + 72. 71 UInt64 + 73. 72 UInt64 + 74. 73 UInt64 + 75. 74 UInt64 + 76. 75 UInt64 + 77. 76 UInt64 + 78. 77 UInt64 + 79. 78 UInt64 + 80. 79 UInt64 + 81. 80 UInt64 + 82. 81 UInt64 + 83. 82 UInt64 + 84. 83 UInt64 + 85. 84 UInt64 + 86. 85 UInt64 + 87. 86 UInt64 + 88. 87 UInt64 + 89. 88 UInt64 + 90. 89 UInt64 + 91. 90 UInt64 + 92. 91 UInt64 + 93. 92 UInt64 + 94. 93 UInt64 + 95. 94 UInt64 + 96. 95 UInt64 + 97. 96 UInt64 + 98. 97 UInt64 + 99. 98 UInt64 +100. 99 UInt64 + + number toTypeName(number) diff --git a/tests/queries/0_stateless/03169_display_column_names_in_footer.sql b/tests/queries/0_stateless/03169_display_column_names_in_footer.sql new file mode 100644 index 00000000000..9e4ec09c21c --- /dev/null +++ b/tests/queries/0_stateless/03169_display_column_names_in_footer.sql @@ -0,0 +1,19 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/65035 +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 49) FORMAT Pretty; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 10) FORMAT Pretty SETTINGS output_format_pretty_display_footer_column_names_min_rows=9; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT Pretty SETTINGS output_format_pretty_display_footer_column_names=0; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT Pretty; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettyNoEscapes; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettyMonoBlock; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettyNoEscapesMonoBlock; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettyNoEscapesMonoBlock; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettyCompact SETTINGS output_format_pretty_display_footer_column_names=0; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettyCompact; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettyCompactNoEscapes; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettyCompactMonoBlock; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettyCompactNoEscapesMonoBlock; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettySpace SETTINGS output_format_pretty_display_footer_column_names=0; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettySpace; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettySpaceNoEscapes; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettySpaceMonoBlock; +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 100) FORMAT PrettySpaceNoEscapesMonoBlock; diff --git a/tests/queries/0_stateless/03169_modify_column_data_loss.reference b/tests/queries/0_stateless/03169_modify_column_data_loss.reference new file mode 100644 index 00000000000..2126a658c16 --- /dev/null +++ b/tests/queries/0_stateless/03169_modify_column_data_loss.reference @@ -0,0 +1,4 @@ +1 one 0 +2 two 0 +3 \N 0 +1 one 1 0 diff --git a/tests/queries/0_stateless/03169_modify_column_data_loss.sql b/tests/queries/0_stateless/03169_modify_column_data_loss.sql new file mode 100644 index 00000000000..def0a25a1b4 --- /dev/null +++ b/tests/queries/0_stateless/03169_modify_column_data_loss.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS column_modify_test; + +CREATE TABLE column_modify_test (id UInt64, val String, other_col UInt64) engine=MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part=0; +INSERT INTO column_modify_test VALUES (1,'one',0); +INSERT INTO column_modify_test VALUES (2,'two',0); + +-- on 21.9 that was done via mutations mechanism +ALTER TABLE column_modify_test MODIFY COLUMN val Nullable(String); + +INSERT INTO column_modify_test VALUES (3,Null,0); + +-- till now everythings looks ok +SELECT * FROM column_modify_test order by id, val, other_col; + +-- Now we do mutation. It will affect one of the parts, and will update columns.txt to the latest / correct state w/o updating the column file! +alter table column_modify_test update other_col=1 where id = 1 SETTINGS mutations_sync=1; + +-- row 1 is damaged now the column file & columns.txt is out of sync! +SELECT *, throwIf(val <> 'one') as issue FROM column_modify_test WHERE id = 1; diff --git a/tests/queries/0_stateless/03169_optimize_injective_functions_inside_uniq_crash.reference b/tests/queries/0_stateless/03169_optimize_injective_functions_inside_uniq_crash.reference new file mode 100644 index 00000000000..e58e9764b39 --- /dev/null +++ b/tests/queries/0_stateless/03169_optimize_injective_functions_inside_uniq_crash.reference @@ -0,0 +1,2 @@ +100 +100 diff --git a/tests/queries/0_stateless/03169_optimize_injective_functions_inside_uniq_crash.sql b/tests/queries/0_stateless/03169_optimize_injective_functions_inside_uniq_crash.sql new file mode 100644 index 00000000000..50d99b851a6 --- /dev/null +++ b/tests/queries/0_stateless/03169_optimize_injective_functions_inside_uniq_crash.sql @@ -0,0 +1,21 @@ +SELECT sum(u) +FROM +( + SELECT + intDiv(number, 4096) AS k, + uniqCombined(tuple(materialize(toLowCardinality(toNullable(16))))) AS u + FROM numbers(4096 * 100) + GROUP BY k +) +SETTINGS allow_experimental_analyzer = 1, optimize_injective_functions_inside_uniq=0; + +SELECT sum(u) +FROM +( + SELECT + intDiv(number, 4096) AS k, + uniqCombined(tuple(materialize(toLowCardinality(toNullable(16))))) AS u + FROM numbers(4096 * 100) + GROUP BY k +) +SETTINGS allow_experimental_analyzer = 1, optimize_injective_functions_inside_uniq=1; diff --git a/tests/queries/0_stateless/03169_time_virtual_column.reference b/tests/queries/0_stateless/03169_time_virtual_column.reference new file mode 100644 index 00000000000..4482956b706 --- /dev/null +++ b/tests/queries/0_stateless/03169_time_virtual_column.reference @@ -0,0 +1 @@ +4 1 diff --git a/tests/queries/0_stateless/03169_time_virtual_column.sh b/tests/queries/0_stateless/03169_time_virtual_column.sh new file mode 100755 index 00000000000..fef1de8c6f2 --- /dev/null +++ b/tests/queries/0_stateless/03169_time_virtual_column.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +sleep 1 +$CLICKHOUSE_LOCAL -nm -q " +select _size, (dateDiff('millisecond', _time, now()) < 600000 AND dateDiff('millisecond', _time, now()) > 0) from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv'); +" +rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv diff --git a/tests/queries/0_stateless/03170_float_schema_inference_small_block.reference b/tests/queries/0_stateless/03170_float_schema_inference_small_block.reference new file mode 100644 index 00000000000..9ee16da8728 --- /dev/null +++ b/tests/queries/0_stateless/03170_float_schema_inference_small_block.reference @@ -0,0 +1,15 @@ +Int64 +x Nullable(Int64) +x Nullable(Int64) +x Nullable(Int64) +Float64 +x Nullable(Float64) +x Nullable(Float64) +x Nullable(Float64) +x Nullable(Float64) +Float64.explicit File +x Nullable(Float64) +Float64.pipe +x Nullable(Float64) +Float64.default max_read_buffer_size +x Nullable(Float64) diff --git a/tests/queries/0_stateless/03170_float_schema_inference_small_block.sh b/tests/queries/0_stateless/03170_float_schema_inference_small_block.sh new file mode 100755 index 00000000000..88f9bfad7ed --- /dev/null +++ b/tests/queries/0_stateless/03170_float_schema_inference_small_block.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# do not fallback to float always +echo "Int64" +$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : 1}' +$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : +1}' +$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : -1}' + +echo "Float64" +$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : 1.1}' +$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : +1.1}' +$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : 1.111}' +$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : +1.111}' + +# this is requried due to previously clickhouse-local does not interprets +# --max_read_buffer_size for fds [1] +# +# [1]: https://github.com/ClickHouse/ClickHouse/pull/64532 +echo "Float64.explicit File" +tmp_path=$(mktemp "$CUR_DIR/03170_float_schema_inference_small_block.json.XXXXXX") +trap 'rm -f $tmp_path' EXIT +cat > "$tmp_path" <<<'{"x" : 1.111}' +$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' --file "$tmp_path" + +echo "Float64.pipe" +echo '{"x" : 1.1}' | $CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' +echo "Float64.default max_read_buffer_size" +echo '{"x" : 1.1}' | $CLICKHOUSE_LOCAL --storage_file_read_method read --input-format JSONEachRow 'desc "table"' diff --git a/tests/queries/0_stateless/03170_part_offset_as_table_column.reference b/tests/queries/0_stateless/03170_part_offset_as_table_column.reference new file mode 100644 index 00000000000..435187cb39b --- /dev/null +++ b/tests/queries/0_stateless/03170_part_offset_as_table_column.reference @@ -0,0 +1,30 @@ +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 diff --git a/tests/queries/0_stateless/03170_part_offset_as_table_column.sql b/tests/queries/0_stateless/03170_part_offset_as_table_column.sql new file mode 100644 index 00000000000..36cbc156744 --- /dev/null +++ b/tests/queries/0_stateless/03170_part_offset_as_table_column.sql @@ -0,0 +1,25 @@ +CREATE TABLE test_table +( + `key` UInt32, + `_part_offset` DEFAULT 0 +) +ENGINE = MergeTree +ORDER BY key; + +INSERT INTO test_table (key) SELECT number +FROM numbers(10); + +set allow_experimental_analyzer=0; + +SELECT * +FROM test_table; + +set allow_experimental_analyzer=1; + +SELECT * +FROM test_table; + +SELECT + key, + _part_offset +FROM test_table; diff --git a/tests/queries/0_stateless/03171_condition_pushdown.reference b/tests/queries/0_stateless/03171_condition_pushdown.reference new file mode 100644 index 00000000000..c7e6479af3b --- /dev/null +++ b/tests/queries/0_stateless/03171_condition_pushdown.reference @@ -0,0 +1,2 @@ +123 123 +456 456 diff --git a/tests/queries/0_stateless/03171_condition_pushdown.sql b/tests/queries/0_stateless/03171_condition_pushdown.sql new file mode 100644 index 00000000000..9cfe41ce921 --- /dev/null +++ b/tests/queries/0_stateless/03171_condition_pushdown.sql @@ -0,0 +1,6 @@ +-- This query succeeds only if it is correctly optimized. +SET allow_experimental_analyzer = 1; +SELECT * FROM (SELECT * FROM numbers(1e19)) AS t1, (SELECT * FROM numbers(1e19)) AS t2 WHERE t1.number IN (123, 456) AND t2.number = t1.number ORDER BY ALL; + +-- Still TODO: +-- SELECT * FROM (SELECT * FROM numbers(1e19)) AS t1, (SELECT * FROM numbers(1e19)) AS t2 WHERE t1.number IN (SELECT 123 UNION ALL SELECT 456) AND t2.number = t1.number ORDER BY ALL; diff --git a/tests/queries/0_stateless/03171_direct_dict_short_circuit_bug.reference b/tests/queries/0_stateless/03171_direct_dict_short_circuit_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03171_direct_dict_short_circuit_bug.sql b/tests/queries/0_stateless/03171_direct_dict_short_circuit_bug.sql new file mode 100644 index 00000000000..034d48d2b8b --- /dev/null +++ b/tests/queries/0_stateless/03171_direct_dict_short_circuit_bug.sql @@ -0,0 +1,37 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/65201 +SET short_circuit_function_evaluation='enable'; + +DROP DICTIONARY IF EXISTS direct_dictionary_simple_key_simple_attributes; +DROP TABLE IF EXISTS simple_key_simple_attributes_source_table; + +CREATE TABLE simple_key_simple_attributes_source_table +( + id UInt64, + value_first String, + value_second String +) + ENGINE = TinyLog; + +INSERT INTO simple_key_simple_attributes_source_table VALUES(0, 'value_0', 'value_second_0'); +INSERT INTO simple_key_simple_attributes_source_table VALUES(1, 'value_1', 'value_second_1'); +INSERT INTO simple_key_simple_attributes_source_table VALUES(2, 'value_2', 'value_second_2'); + + +CREATE DICTIONARY direct_dictionary_simple_key_simple_attributes +( + `id` UInt64, + `value_first` String DEFAULT 'value_first_default', + `value_second` String DEFAULT 'value_second_default' +) + PRIMARY KEY id + SOURCE(CLICKHOUSE(TABLE 'simple_key_simple_attributes_source_table')) + LAYOUT(DIRECT()); + +SELECT + toUInt128(1), + dictGetOrDefault('direct_dictionary_simple_key_simple_attributes', 'value_second', number, toString(toFixedString(toFixedString(toFixedString(materialize(toNullable('default')), 7), 7), toNullable(toNullable(toNullable(toUInt128(7))))))) AS value_second +FROM system.numbers LIMIT 255 +FORMAT Null; + +DROP DICTIONARY IF EXISTS direct_dictionary_simple_key_simple_attributes; +DROP TABLE IF EXISTS simple_key_simple_attributes_source_table; diff --git a/tests/queries/0_stateless/03171_hashed_dictionary_short_circuit_bug_fix.reference b/tests/queries/0_stateless/03171_hashed_dictionary_short_circuit_bug_fix.reference new file mode 100644 index 00000000000..a2ac115060f --- /dev/null +++ b/tests/queries/0_stateless/03171_hashed_dictionary_short_circuit_bug_fix.reference @@ -0,0 +1,6 @@ +100 1 1 +300 3 0 +200 2 2 +100 1 1 +300 3 0 +200 2 2 diff --git a/tests/queries/0_stateless/03171_hashed_dictionary_short_circuit_bug_fix.sql b/tests/queries/0_stateless/03171_hashed_dictionary_short_circuit_bug_fix.sql new file mode 100644 index 00000000000..e1b5531a442 --- /dev/null +++ b/tests/queries/0_stateless/03171_hashed_dictionary_short_circuit_bug_fix.sql @@ -0,0 +1,30 @@ +-- Tags: no-parallel + +CREATE TABLE x ( hash_id UInt64, user_result Decimal(3, 2) ) ENGINE = Memory(); + +CREATE TABLE y ( hash_id UInt64, user_result DECIMAL(18, 6) ) ENGINE = Memory(); + +INSERT INTO x values (100, 1), (200, 2); +INSERT INTO y values (100, 1), (300, 3), (200, 2); + +CREATE DICTIONARY d1 (hash_id UInt64, user_result Decimal(3, 2) ) +PRIMARY KEY hash_id +SOURCE(CLICKHOUSE(TABLE 'x')) +LIFETIME(0) +LAYOUT(HASHED()); + +SELECT hash_id, + dictGetOrDefault(d1, 'user_result', toUInt64(hash_id), toFloat64(user_result)), + dictGet(d1, 'user_result', toUInt64(hash_id)) +FROM y; + +CREATE DICTIONARY d2 (hash_id UInt64, user_result Decimal(3, 2) ) +PRIMARY KEY hash_id +SOURCE(CLICKHOUSE(TABLE 'x')) +LIFETIME(0) +LAYOUT(HASHED_ARRAY()); + +SELECT hash_id, + dictGetOrDefault(d2, 'user_result', toUInt64(hash_id), toFloat64(user_result)), + dictGet(d2, 'user_result', toUInt64(hash_id)) +FROM y; diff --git a/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.reference b/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.reference new file mode 100644 index 00000000000..6e8a5df9145 --- /dev/null +++ b/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.reference @@ -0,0 +1,9 @@ +121 +121 +32 +21 +10 +32 +22 +11 +1 diff --git a/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.sql b/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.sql new file mode 100644 index 00000000000..2d566e52c94 --- /dev/null +++ b/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS test_hilbert_encode_hilbert_encode; + +CREATE TABLE test_hilbert_encode (x UInt32, y UInt32) ENGINE = MergeTree ORDER BY hilbertEncode(x, y) SETTINGS index_granularity = 8192, index_granularity_bytes = '1Mi'; +INSERT INTO test_hilbert_encode SELECT number DIV 1024, number % 1024 FROM numbers(1048576); + +SET max_rows_to_read = 8192, force_primary_key = 1, analyze_index_with_space_filling_curves = 1; +SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND x <= 20 AND y >= 20 AND y <= 30; + +SET max_rows_to_read = 8192, force_primary_key = 1, analyze_index_with_space_filling_curves = 0; +SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND x <= 20 AND y >= 20 AND y <= 30; -- { serverError 277 } + +DROP TABLE test_hilbert_encode; + +-- The same, but with more precise index + +CREATE TABLE test_hilbert_encode (x UInt32, y UInt32) ENGINE = MergeTree ORDER BY hilbertEncode(x, y) SETTINGS index_granularity = 1; +SET max_rows_to_read = 0; +INSERT INTO test_hilbert_encode SELECT number DIV 32, number % 32 FROM numbers(1024); + +SET max_rows_to_read = 200, force_primary_key = 1, analyze_index_with_space_filling_curves = 1; +SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND x <= 20 AND y >= 20 AND y <= 30; + +-- Various other conditions + +SELECT count() FROM test_hilbert_encode WHERE x = 10 SETTINGS max_rows_to_read = 49; +SELECT count() FROM test_hilbert_encode WHERE x = 10 AND y > 10 SETTINGS max_rows_to_read = 33; +SELECT count() FROM test_hilbert_encode WHERE x = 10 AND y < 10 SETTINGS max_rows_to_read = 15; + +SELECT count() FROM test_hilbert_encode WHERE y = 10 SETTINGS max_rows_to_read = 50; +SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND y = 10 SETTINGS max_rows_to_read = 35; +SELECT count() FROM test_hilbert_encode WHERE y = 10 AND x <= 10 SETTINGS max_rows_to_read = 17; + +SELECT count() FROM test_hilbert_encode PREWHERE x >= 10 WHERE x < 11 AND y = 10 SETTINGS max_rows_to_read = 2; + +DROP TABLE test_hilbert_encode; diff --git a/tests/queries/0_stateless/03172_bcrypt_validation.reference b/tests/queries/0_stateless/03172_bcrypt_validation.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03172_bcrypt_validation.sql b/tests/queries/0_stateless/03172_bcrypt_validation.sql new file mode 100644 index 00000000000..37dd0c9bb5d --- /dev/null +++ b/tests/queries/0_stateless/03172_bcrypt_validation.sql @@ -0,0 +1,3 @@ +-- Tags: no-fasttest +DROP USER IF EXISTS 03172_user_invalid_bcrypt_hash; +CREATE USER 03172_user_invalid_bcrypt_hash IDENTIFIED WITH bcrypt_hash BY '012345678901234567890123456789012345678901234567890123456789'; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.reference b/tests/queries/0_stateless/03172_error_log_table_not_empty.reference new file mode 100644 index 00000000000..a9e2f17562a --- /dev/null +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.reference @@ -0,0 +1,6 @@ +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh new file mode 100755 index 00000000000..8d74ebe1039 --- /dev/null +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Get the previous number of errors for 111, 222 and 333 +errors_111=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 111") +errors_222=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 222") +errors_333=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 333") + +# Throw three random errors: 111, 222 and 333 and wait for more than collect_interval_milliseconds to ensure system.error_log is flushed +$CLICKHOUSE_CLIENT -mn -q " +SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } +SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } +SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } +SELECT sleep(2) format NULL; +SYSTEM FLUSH LOGS; +" + +# Check that the three random errors are propagated +$CLICKHOUSE_CLIENT -mn -q " +SELECT sum(value) > $errors_111 FROM system.error_log WHERE code = 111; +SELECT sum(value) > $errors_222 FROM system.error_log WHERE code = 222; +SELECT sum(value) > $errors_333 FROM system.error_log WHERE code = 333; +" + +# Ensure that if we throw them again, they're still propagated +$CLICKHOUSE_CLIENT -mn -q " +SELECT throwIf(true, 'error_log', toInt16(111)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 111 } +SELECT throwIf(true, 'error_log', toInt16(222)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 222 } +SELECT throwIf(true, 'error_log', toInt16(333)) SETTINGS allow_custom_error_code_in_throwif=1; -- { serverError 333 } +SELECT sleep(2) format NULL; +SYSTEM FLUSH LOGS; +" + +$CLICKHOUSE_CLIENT -mn -q " +SELECT sum(value) > $(($errors_111+1)) FROM system.error_log WHERE code = 111; +SELECT sum(value) > $(($errors_222+1)) FROM system.error_log WHERE code = 222; +SELECT sum(value) > $(($errors_333+1)) FROM system.error_log WHERE code = 333; +" \ No newline at end of file diff --git a/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.reference b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql new file mode 100644 index 00000000000..0cadd4f5cee --- /dev/null +++ b/tests/queries/0_stateless/03173_check_cyclic_dependencies_on_create_and_rename.sql @@ -0,0 +1,77 @@ +-- Tags: atomic-database + +DROP TABLE IF EXISTS test; +CREATE TABLE test (id UInt64, value String) ENGINE=MergeTree ORDER BY id; +INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(10); +DROP DICTIONARY IF EXISTS test_dict; +CREATE DICTIONARY test_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE test)) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); +DROP TABLE IF EXISTS view_source; +CREATE TABLE view_source (id UInt64) ENGINE=MergeTree ORDER BY id; +INSERT INTO view_source SELECT * FROM numbers(5); +DROP VIEW IF EXISTS view; +CREATE VIEW view AS SELECT id, dictGet('test_dict', 'value', id) as value FROM view_source; + +CREATE OR REPLACE DICTIONARY test_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE view)) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); -- {serverError INFINITE_LOOP} + +REPLACE DICTIONARY test_dict +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE view)) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); -- {serverError INFINITE_LOOP} + + +DROP DICTIONARY IF EXISTS test_dict_2; +CREATE DICTIONARY test_dict_2 +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE view)) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); + +EXCHANGE DICTIONARIES test_dict AND test_dict_2; -- {serverError INFINITE_LOOP} + +DROP DICTIONARY test_dict_2; + +CREATE OR REPLACE DICTIONARY test_dict_2 +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE view)) +LAYOUT(FLAT()) +LIFETIME(MIN 0 MAX 1000); + +EXCHANGE DICTIONARIES test_dict AND test_dict_2; -- {serverError INFINITE_LOOP} + +DROP DICTIONARY test_dict; +RENAME DICTIONARY test_dict_2 to test_dict; -- {serverError INFINITE_LOOP} + +DROP DICTIONARY test_dict_2; +DROP VIEW view; +DROP TABLE test; +DROP TABLE view_source; + diff --git a/tests/queries/0_stateless/03173_distinct_combinator_alignment.reference b/tests/queries/0_stateless/03173_distinct_combinator_alignment.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03173_distinct_combinator_alignment.sql b/tests/queries/0_stateless/03173_distinct_combinator_alignment.sql new file mode 100644 index 00000000000..4a066be5086 --- /dev/null +++ b/tests/queries/0_stateless/03173_distinct_combinator_alignment.sql @@ -0,0 +1 @@ +SELECT toTypeName(topKDistinctState(toNullable(10))(toString(number)) IGNORE NULLS) FROM numbers(100) GROUP BY tuple((map((materialize(toNullable(1)), 2), 4, (3, 4), 5), 3)), map((1, 2), 4, (3, 4), toNullable(5)) WITH CUBE WITH TOTALS FORMAT Null diff --git a/tests/queries/0_stateless/03173_forbid_qualify.reference b/tests/queries/0_stateless/03173_forbid_qualify.reference new file mode 100644 index 00000000000..c2f595d8c4b --- /dev/null +++ b/tests/queries/0_stateless/03173_forbid_qualify.reference @@ -0,0 +1,3 @@ +100 +49 +100 diff --git a/tests/queries/0_stateless/03173_forbid_qualify.sql b/tests/queries/0_stateless/03173_forbid_qualify.sql new file mode 100644 index 00000000000..d8cb2bad2ea --- /dev/null +++ b/tests/queries/0_stateless/03173_forbid_qualify.sql @@ -0,0 +1,11 @@ +drop table if exists test_qualify; +create table test_qualify (number Int64) ENGINE = MergeTree ORDER BY (number); + +insert into test_qualify SELECT * FROM numbers(100); + +select count() from test_qualify; -- 100 +select * from test_qualify qualify row_number() over (order by number) = 50 SETTINGS allow_experimental_analyzer = 1; -- 49 +select * from test_qualify qualify row_number() over (order by number) = 50 SETTINGS allow_experimental_analyzer = 0; -- { serverError NOT_IMPLEMENTED } + +delete from test_qualify where number in (select number from test_qualify qualify row_number() over (order by number) = 50); -- { serverError UNFINISHED } +select count() from test_qualify; -- 100 diff --git a/tests/queries/0_stateless/03174_merge_join_bug.reference b/tests/queries/0_stateless/03174_merge_join_bug.reference new file mode 100644 index 00000000000..af98bcd6397 --- /dev/null +++ b/tests/queries/0_stateless/03174_merge_join_bug.reference @@ -0,0 +1,10 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 diff --git a/tests/queries/0_stateless/03174_merge_join_bug.sql b/tests/queries/0_stateless/03174_merge_join_bug.sql new file mode 100644 index 00000000000..ab4cb6cd4a9 --- /dev/null +++ b/tests/queries/0_stateless/03174_merge_join_bug.sql @@ -0,0 +1,10 @@ +-- Tags: no-random-settings + +SET allow_experimental_analyzer=1, join_algorithm = 'full_sorting_merge'; +CREATE TABLE xxxx_yyy (key UInt32, key_b ALIAS key) ENGINE=MergeTree() ORDER BY key SETTINGS ratio_of_defaults_for_sparse_serialization=0.0; +INSERT INTO xxxx_yyy SELECT number FROM numbers(10); + +SELECT * +FROM xxxx_yyy AS a +INNER JOIN xxxx_yyy AS b ON a.key = b.key_b +ORDER BY a.key; diff --git a/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.reference b/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.reference new file mode 100644 index 00000000000..9c849a44af7 --- /dev/null +++ b/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.reference @@ -0,0 +1,116 @@ +2000-01-01 00:00:00 3732436800 3732436800 0 +2000-01-02 00:00:00 11197396800 11197396800 0 +2000-01-03 00:00:00 18662356800 18662356800 0 +2000-01-04 00:00:00 26127316800 26127316800 0 +2000-01-05 00:00:00 33592276800 33592276800 0 +2000-01-06 00:00:00 41057236800 41057236800 0 +2000-01-07 00:00:00 48522196800 48522196800 0 +2000-01-08 00:00:00 55987156800 55987156800 0 +2000-01-09 00:00:00 63452116800 63452116800 0 +2000-01-10 00:00:00 70917076800 70917076800 0 +2000-01-11 00:00:00 78382036800 78382036800 0 +2000-01-12 00:00:00 85846996800 85846996800 0 +2000-01-13 00:00:00 93311956800 93311956800 0 +2000-01-14 00:00:00 100776916800 100776916800 0 +2000-01-15 00:00:00 108241876800 108241876800 0 +2000-01-16 00:00:00 115706836800 115706836800 0 +2000-01-17 00:00:00 123171796800 123171796800 0 +2000-01-18 00:00:00 130636756800 130636756800 0 +2000-01-19 00:00:00 138101716800 138101716800 0 +2000-01-20 00:00:00 145566676800 145566676800 0 +2000-01-21 00:00:00 153031636800 153031636800 0 +2000-01-22 00:00:00 160496596800 160496596800 0 +2000-01-23 00:00:00 167961556800 167961556800 0 +2000-01-24 00:00:00 175426516800 175426516800 0 +2000-01-25 00:00:00 182891476800 182891476800 0 +2000-01-26 00:00:00 190356436800 190356436800 0 +2000-01-27 00:00:00 197821396800 197821396800 0 +2000-01-28 00:00:00 205286356800 205286356800 0 +2000-01-29 00:00:00 212751316800 212751316800 0 +2000-01-30 00:00:00 220216276800 220216276800 0 +2000-01-31 00:00:00 227681236800 227681236800 0 +2000-02-01 00:00:00 235146196800 235146196800 0 +2000-02-02 00:00:00 242611156800 242611156800 0 +2000-02-03 00:00:00 250076116800 250076116800 0 +2000-02-04 00:00:00 257541076800 257541076800 0 +2000-02-05 00:00:00 265006036800 265006036800 0 +2000-02-06 00:00:00 272470996800 272470996800 0 +2000-02-07 00:00:00 279935956800 279935956800 0 +2000-02-08 00:00:00 287400916800 287400916800 0 +2000-02-09 00:00:00 294865876800 294865876800 0 +2000-02-10 00:00:00 302330836800 302330836800 0 +2000-02-11 00:00:00 309795796800 309795796800 0 +2000-02-12 00:00:00 317260756800 317260756800 0 +2000-02-13 00:00:00 324725716800 324725716800 0 +2000-02-14 00:00:00 332190676800 332190676800 0 +2000-02-15 00:00:00 339655636800 339655636800 0 +2000-02-16 00:00:00 347120596800 347120596800 0 +2000-02-17 00:00:00 354585556800 354585556800 0 +2000-02-18 00:00:00 362050516800 362050516800 0 +2000-02-19 00:00:00 369515476800 369515476800 0 +2000-02-20 00:00:00 376980436800 376980436800 0 +2000-02-21 00:00:00 384445396800 384445396800 0 +2000-02-22 00:00:00 391910356800 391910356800 0 +2000-02-23 00:00:00 399375316800 399375316800 0 +2000-02-24 00:00:00 406840276800 406840276800 0 +2000-02-25 00:00:00 414305236800 414305236800 0 +2000-02-26 00:00:00 421770196800 421770196800 0 +2000-02-27 00:00:00 429235156800 429235156800 0 +2000-02-28 00:00:00 436700116800 436700116800 0 +2000-02-29 00:00:00 444165076800 444165076800 0 +2000-03-01 00:00:00 451630036800 451630036800 0 +2000-03-02 00:00:00 459094996800 459094996800 0 +2000-03-03 00:00:00 466559956800 466559956800 0 +2000-03-04 00:00:00 474024916800 474024916800 0 +2000-03-05 00:00:00 481489876800 481489876800 0 +2000-03-06 00:00:00 488954836800 488954836800 0 +2000-03-07 00:00:00 496419796800 496419796800 0 +2000-03-08 00:00:00 503884756800 503884756800 0 +2000-03-09 00:00:00 511349716800 511349716800 0 +2000-03-10 00:00:00 518814676800 518814676800 0 +2000-03-11 00:00:00 526279636800 526279636800 0 +2000-03-12 00:00:00 533744596800 533744596800 0 +2000-03-13 00:00:00 541209556800 541209556800 0 +2000-03-14 00:00:00 548674516800 548674516800 0 +2000-03-15 00:00:00 556139476800 556139476800 0 +2000-03-16 00:00:00 563604436800 563604436800 0 +2000-03-17 00:00:00 571069396800 571069396800 0 +2000-03-18 00:00:00 578534356800 578534356800 0 +2000-03-19 00:00:00 585999316800 585999316800 0 +2000-03-20 00:00:00 593464276800 593464276800 0 +2000-03-21 00:00:00 600929236800 600929236800 0 +2000-03-22 00:00:00 608394196800 608394196800 0 +2000-03-23 00:00:00 615859156800 615859156800 0 +2000-03-24 00:00:00 623324116800 623324116800 0 +2000-03-25 00:00:00 630789076800 630789076800 0 +2000-03-26 00:00:00 638254036800 638254036800 0 +2000-03-27 00:00:00 645718996800 645718996800 0 +2000-03-28 00:00:00 653183956800 653183956800 0 +2000-03-29 00:00:00 660648916800 660648916800 0 +2000-03-30 00:00:00 668113876800 668113876800 0 +2000-03-31 00:00:00 675578836800 675578836800 0 +2000-04-01 00:00:00 683043796800 683043796800 0 +2000-04-02 00:00:00 690508756800 690508756800 0 +2000-04-03 00:00:00 697973716800 697973716800 0 +2000-04-04 00:00:00 705438676800 705438676800 0 +2000-04-05 00:00:00 712903636800 712903636800 0 +2000-04-06 00:00:00 720368596800 720368596800 0 +2000-04-07 00:00:00 727833556800 727833556800 0 +2000-04-08 00:00:00 735298516800 735298516800 0 +2000-04-09 00:00:00 742763476800 742763476800 0 +2000-04-10 00:00:00 750228436800 750228436800 0 +2000-04-11 00:00:00 757693396800 757693396800 0 +2000-04-12 00:00:00 765158356800 765158356800 0 +2000-04-13 00:00:00 772623316800 772623316800 0 +2000-04-14 00:00:00 780088276800 780088276800 0 +2000-04-15 00:00:00 787553236800 787553236800 0 +2000-04-16 00:00:00 795018196800 795018196800 0 +2000-04-17 00:00:00 802483156800 802483156800 0 +2000-04-18 00:00:00 809948116800 809948116800 0 +2000-04-19 00:00:00 817413076800 817413076800 0 +2000-04-20 00:00:00 824878036800 824878036800 0 +2000-04-21 00:00:00 832342996800 832342996800 0 +2000-04-22 00:00:00 839807956800 839807956800 0 +2000-04-23 00:00:00 847272916800 847272916800 0 +2000-04-24 00:00:00 854737876800 854737876800 0 +2000-04-25 00:00:00 637951968000 862202836800 224250868800 diff --git a/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.sql b/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.sql new file mode 100644 index 00000000000..c8da71b7f4d --- /dev/null +++ b/tests/queries/0_stateless/03174_split_parts_ranges_into_intersecting_and_non_intersecting_final_and_read-in-order_bug.sql @@ -0,0 +1,12 @@ +-- Tags: no-tsan, no-asan, no-msan, no-fasttest +-- Test is slow +create table tab (x DateTime('UTC'), y UInt32, v Int32) engine = ReplacingMergeTree(v) order by x; +insert into tab select toDateTime('2000-01-01', 'UTC') + number, number, 1 from numbers(1e7); +optimize table tab final; + +WITH (60 * 60) * 24 AS d +select toStartOfDay(x) as k, sum(y) as v, + (z + d) * (z + d - 1) / 2 - (toUInt64(k - toDateTime('2000-01-01', 'UTC')) as z) * (z - 1) / 2 as est, + est - v as delta +from tab final group by k order by k +settings max_threads=8, optimize_aggregation_in_order=1, split_parts_ranges_into_intersecting_and_non_intersecting_final=1; diff --git a/tests/queries/0_stateless/03195_group_concat_deserialization_fix.reference b/tests/queries/0_stateless/03195_group_concat_deserialization_fix.reference new file mode 100644 index 00000000000..1696fc46554 --- /dev/null +++ b/tests/queries/0_stateless/03195_group_concat_deserialization_fix.reference @@ -0,0 +1,3 @@ +First +First +Second diff --git a/tests/queries/0_stateless/03195_group_concat_deserialization_fix.sql b/tests/queries/0_stateless/03195_group_concat_deserialization_fix.sql new file mode 100644 index 00000000000..337f1f3db24 --- /dev/null +++ b/tests/queries/0_stateless/03195_group_concat_deserialization_fix.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS test_serialization; + +CREATE TABLE test_serialization +( + id UInt64, + text AggregateFunction(groupConcat, String) +) ENGINE = AggregatingMergeTree() ORDER BY id; + +INSERT INTO test_serialization SELECT + 1, + groupConcatState('First'); + +SELECT groupConcatMerge(text) AS concatenated_text FROM test_serialization GROUP BY id; + +INSERT INTO test_serialization SELECT + 2, + groupConcatState('Second'); + +SELECT groupConcatMerge(text) AS concatenated_text FROM test_serialization GROUP BY id ORDER BY id; + +DROP TABLE IF EXISTS test_serialization; + + diff --git a/tests/queries/0_stateless/03196_local_memory_limit.reference b/tests/queries/0_stateless/03196_local_memory_limit.reference new file mode 100644 index 00000000000..f2e22e8aa5b --- /dev/null +++ b/tests/queries/0_stateless/03196_local_memory_limit.reference @@ -0,0 +1 @@ +maximum: 95.37 MiB diff --git a/tests/queries/0_stateless/03196_local_memory_limit.sh b/tests/queries/0_stateless/03196_local_memory_limit.sh new file mode 100755 index 00000000000..346b37be006 --- /dev/null +++ b/tests/queries/0_stateless/03196_local_memory_limit.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --config-file <(echo "100M") --query "SELECT number FROM system.numbers GROUP BY number HAVING count() > 1" 2>&1 | grep -o -P 'maximum: [\d\.]+ MiB' diff --git a/tests/queries/0_stateless/03196_max_intersections_arena_crash.reference b/tests/queries/0_stateless/03196_max_intersections_arena_crash.reference new file mode 100644 index 00000000000..049e7107258 --- /dev/null +++ b/tests/queries/0_stateless/03196_max_intersections_arena_crash.reference @@ -0,0 +1,8 @@ +1 3 3 +1 6 3 +2 5 3 +3 7 3 +1 3 2 +1 6 2 +2 5 2 +3 7 2 diff --git a/tests/queries/0_stateless/03196_max_intersections_arena_crash.sql b/tests/queries/0_stateless/03196_max_intersections_arena_crash.sql new file mode 100644 index 00000000000..b7269d7c4e2 --- /dev/null +++ b/tests/queries/0_stateless/03196_max_intersections_arena_crash.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS my_events; +CREATE TABLE my_events (start UInt32, end UInt32) Engine = MergeTree ORDER BY tuple() + AS Select * FROM VALUES ('start UInt32, end UInt32', (1, 3), (1, 6), (2, 5), (3, 7)); +SELECT start, end, maxIntersections(start, end) OVER () FROM my_events; +SELECT start, end, maxIntersectionsPosition(start, end) OVER () FROM my_events; diff --git a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference new file mode 100644 index 00000000000..bd9ab3be3fa --- /dev/null +++ b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.reference @@ -0,0 +1,2 @@ +2024-06-20 00:00:00 +2024-06-20 00:00:00 diff --git a/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql new file mode 100644 index 00000000000..e83738f7214 --- /dev/null +++ b/tests/queries/0_stateless/03197_fix_parse_mysql_iso_date.sql @@ -0,0 +1,2 @@ +SELECT parseDateTime('2024-06-20', '%F', 'UTC') AS x; +SELECT parseDateTime('06/20/24', '%D', 'UTC') AS x; diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql new file mode 100644 index 00000000000..5aa3e4c2e0c --- /dev/null +++ b/tests/queries/0_stateless/03197_storage_join_strictness_type_restriction.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(SEMI, FULL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, ALL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, INNER, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANTI, FULL, a); -- { serverError BAD_ARGUMENTS } + +CREATE TABLE t1 +( + a Int64, + b Int64 +) Engine = Join(ANY, FULL, a); -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/03198_unload_primary_key_outdated.reference b/tests/queries/0_stateless/03198_unload_primary_key_outdated.reference new file mode 100644 index 00000000000..28655f938ba --- /dev/null +++ b/tests/queries/0_stateless/03198_unload_primary_key_outdated.reference @@ -0,0 +1,3 @@ +all_1_1_0 1 16 +all_1_1_0 0 0 +all_1_1_0_2 1 16 diff --git a/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh new file mode 100755 index 00000000000..4f217935123 --- /dev/null +++ b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -n " + DROP TABLE IF EXISTS t_unload_primary_key; + + CREATE TABLE t_unload_primary_key (a UInt64, b UInt64) + ENGINE = MergeTree ORDER BY a + SETTINGS old_parts_lifetime = 10000; + + INSERT INTO t_unload_primary_key VALUES (1, 1); + + SELECT name, active, primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' ORDER BY name; + + ALTER TABLE t_unload_primary_key UPDATE b = 100 WHERE 1 SETTINGS mutations_sync = 2; +" + +for _ in {1..100}; do + res=$($CLICKHOUSE_CLIENT -q "SELECT primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' AND name = 'all_1_1_0'") + if [[ $res -eq 0 ]]; then + break + fi + sleep 0.3 +done + +$CLICKHOUSE_CLIENT -n " + SELECT name, active, primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' ORDER BY name; + DROP TABLE IF EXISTS t_unload_primary_key; +" diff --git a/tests/queries/0_stateless/data_json/key_ignore_case.json b/tests/queries/0_stateless/data_json/key_ignore_case.json new file mode 100644 index 00000000000..ad8f7cb4507 Binary files /dev/null and b/tests/queries/0_stateless/data_json/key_ignore_case.json differ diff --git a/tests/queries/0_stateless/helpers/client.py b/tests/queries/0_stateless/helpers/client.py index 5c8589dfca1..ac0896f2e93 100644 --- a/tests/queries/0_stateless/helpers/client.py +++ b/tests/queries/0_stateless/helpers/client.py @@ -8,7 +8,7 @@ sys.path.insert(0, os.path.join(CURDIR)) import uexpect -prompt = ":\) " +prompt = ":\\) " end_of_block = r".*\r\n.*\r\n" @@ -21,7 +21,7 @@ class client(object): self.client.eol("\r") self.client.logger(log, prefix=name) self.client.timeout(120) - self.client.expect("[#\$] ", timeout=60) + self.client.expect("[#\\$] ", timeout=60) self.client.send(command) def __enter__(self): diff --git a/tests/queries/0_stateless/helpers/shell.py b/tests/queries/0_stateless/helpers/shell.py index befb3dcd543..c3fff61ffc9 100644 --- a/tests/queries/0_stateless/helpers/shell.py +++ b/tests/queries/0_stateless/helpers/shell.py @@ -10,7 +10,7 @@ import uexpect class shell(object): - def __init__(self, command=None, name="", log=None, prompt="[#\$] "): + def __init__(self, command=None, name="", log=None, prompt="[#\\$] "): if command is None: command = ["/bin/bash", "--noediting"] self.prompt = prompt diff --git a/tests/queries/0_stateless/mergetree_mutations.lib b/tests/queries/0_stateless/mergetree_mutations.lib index b11b2e6b852..9eeea87b52d 100644 --- a/tests/queries/0_stateless/mergetree_mutations.lib +++ b/tests/queries/0_stateless/mergetree_mutations.lib @@ -37,7 +37,7 @@ function wait_for_all_mutations() echo "Timed out while waiting for mutation to execute!" fi - sleep 0.1 + sleep 0.3 done } diff --git a/tests/queries/1_stateful/00166_explain_estimate.sql b/tests/queries/1_stateful/00166_explain_estimate.sql index c4071271736..136433c16ee 100644 --- a/tests/queries/1_stateful/00166_explain_estimate.sql +++ b/tests/queries/1_stateful/00166_explain_estimate.sql @@ -1,6 +1,8 @@ -- Tags: no-replicated-database -- Tag no-replicated-database: Requires investigation +SET optimize_use_implicit_projections = 0; + EXPLAIN ESTIMATE SELECT count() FROM test.hits WHERE CounterID = 29103473; EXPLAIN ESTIMATE SELECT count() FROM test.hits WHERE CounterID != 29103473; EXPLAIN ESTIMATE SELECT count() FROM test.hits WHERE CounterID > 29103473; diff --git a/tests/queries/1_stateful/00175_counting_resources_in_subqueries.sql b/tests/queries/1_stateful/00175_counting_resources_in_subqueries.sql index 63eca96414f..5733bf6dd18 100644 --- a/tests/queries/1_stateful/00175_counting_resources_in_subqueries.sql +++ b/tests/queries/1_stateful/00175_counting_resources_in_subqueries.sql @@ -1,3 +1,5 @@ +SET optimize_use_implicit_projections = 0; + -- the work for scalar subquery is properly accounted: SET max_rows_to_read = 1000000; SELECT 1 = (SELECT count() FROM test.hits WHERE NOT ignore(AdvEngineID)); -- { serverError TOO_MANY_ROWS } diff --git a/tests/sqllogic/test_parser.py b/tests/sqllogic/test_parser.py index c0abcaecd25..bd30674b23a 100755 --- a/tests/sqllogic/test_parser.py +++ b/tests/sqllogic/test_parser.py @@ -526,7 +526,7 @@ class QueryResult: for row in rows: res_row = [] for c, t in zip(row, types): - logger.debug("Builging row. c:%s t:%s", c, t) + logger.debug("Building row. c:%s t:%s", c, t) if c is None: res_row.append("NULL") continue diff --git a/utils/backup/backup b/utils/backup/backup new file mode 100755 index 00000000000..6aa9c179033 --- /dev/null +++ b/utils/backup/backup @@ -0,0 +1,47 @@ +#!/bin/bash + +user="default" +path="." + +usage() { + echo + echo "A trivial script to upload your files into ClickHouse." + echo "You might want to use something like Dropbox instead, but..." + echo + echo "Usage: $0 --host [--user ] --password " + exit 1 +} + +while [[ "$#" -gt 0 ]]; do + case "$1" in + --host) + host="$2" + shift 2 + ;; + --user) + user="$2" + shift 2 + ;; + --password) + password="$2" + shift 2 + ;; + --help) + usage + ;; + *) + path="$1" + shift 1 + ;; + esac +done + +if [ -z "$host" ] || [ -z "$password" ]; then + echo "Error: --host and --password are mandatory." + usage +fi + +clickhouse-client --host "$host" --user "$user" --password "$password" --secure --query "CREATE TABLE IF NOT EXISTS default.files (time DEFAULT now(), path String, content String CODEC(ZSTD(6))) ENGINE = MergeTree ORDER BY (path, time)" && +find "$path" -type f | clickhouse-local --input-format LineAsString \ + --max-block-size 1 --min-insert-block-size-rows 0 --min-insert-block-size-bytes '100M' --max-insert-threads 1 \ + --query "INSERT INTO FUNCTION remoteSecure('$host', default.files, '$user', '$password') (path, content) SELECT line, file(line) FROM table" --progress diff --git a/utils/backupview/clickhouse_backupview.py b/utils/backupview/clickhouse_backupview.py index 4ba1f391d02..d1331e2ab49 100755 --- a/utils/backupview/clickhouse_backupview.py +++ b/utils/backupview/clickhouse_backupview.py @@ -8,6 +8,7 @@ import shutil import zipfile # For reading backups from zip archives import boto3 # For reading backups from S3 +import botocore ## Examples: diff --git a/utils/changelog/changelog.py b/utils/changelog/changelog.py index 314461a6b3a..b79e4139bcc 100755 --- a/utils/changelog/changelog.py +++ b/utils/changelog/changelog.py @@ -1,427 +1,15 @@ #!/usr/bin/env python3 # In our CI this script runs in style-test containers -import argparse -import logging -import os -import os.path as p -import re -from datetime import date, timedelta -from subprocess import DEVNULL, CalledProcessError -from typing import Dict, List, Optional, TextIO +# The main script is moved to tests/ci/changelog.py +# It depends on the ci scripts too hard to keep it here +# Here's only a wrapper around it for the people who used to it -from github.GithubException import RateLimitExceededException, UnknownObjectException -from github.NamedUser import NamedUser -from thefuzz.fuzz import ratio # type: ignore - -from git_helper import git_runner as runner -from git_helper import is_shallow -from github_helper import GitHub, PullRequest, PullRequests, Repository - -# This array gives the preferred category order, and is also used to -# normalize category names. -# Categories are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there -# updated accordingly -categories_preferred_order = ( - "Backward Incompatible Change", - "New Feature", - "Performance Improvement", - "Improvement", - "Critical Bug Fix", - "Bug Fix", - "Build/Testing/Packaging Improvement", - "Other", -) - -FROM_REF = "" -TO_REF = "" -SHA_IN_CHANGELOG = [] # type: List[str] -gh = GitHub(create_cache_dir=False) -CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache") - - -class Description: - def __init__( - self, number: int, user: NamedUser, html_url: str, entry: str, category: str - ): - self.number = number - self.html_url = html_url - self.user = gh.get_user_cached(user._rawData["login"]) # type: ignore - self.entry = entry - self.category = category - - @property - def formatted_entry(self) -> str: - # Substitute issue links. - # 1) issue number w/o markdown link - entry = re.sub( - r"([^[])#([0-9]{4,})", - r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)", - self.entry, - ) - # 2) issue URL w/o markdown link - # including #issuecomment-1 or #event-12 - entry = re.sub( - r"([^(])(https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})[-#a-z0-9]*)", - r"\1[#\3](\2)", - entry, - ) - # It's possible that we face a secondary rate limit. - # In this case we should sleep until we get it - while True: - try: - user_name = self.user.name if self.user.name else self.user.login - break - except UnknownObjectException: - user_name = self.user.login - break - except RateLimitExceededException: - gh.sleep_on_rate_limit() - return ( - f"* {entry} [#{self.number}]({self.html_url}) " - f"([{user_name}]({self.user.html_url}))." - ) - - # Sort PR descriptions by numbers - def __eq__(self, other) -> bool: - if not isinstance(self, type(other)): - return NotImplemented - return self.number == other.number - - def __lt__(self, other: "Description") -> bool: - return self.number < other.number - - -def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]: - descriptions = {} # type: Dict[str, List[Description]] - repos = {} # type: Dict[str, Repository] - for pr in prs: - # See https://github.com/PyGithub/PyGithub/issues/2202, - # obj._rawData doesn't spend additional API requests - # We'll save some requests - # pylint: disable=protected-access - repo_name = pr._rawData["base"]["repo"]["full_name"] - # pylint: enable=protected-access - if repo_name not in repos: - repos[repo_name] = pr.base.repo - in_changelog = False - merge_commit = pr.merge_commit_sha - if merge_commit is None: - logging.warning("PR %s does not have merge-commit, skipping", pr.number) - continue - - in_changelog = merge_commit in SHA_IN_CHANGELOG - if in_changelog: - desc = generate_description(pr, repos[repo_name]) - if desc: - if desc.category not in descriptions: - descriptions[desc.category] = [] - descriptions[desc.category].append(desc) - - for descs in descriptions.values(): - descs.sort() - - return descriptions - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description="Generate a changelog in Markdown format between given tags. " - "It fetches all tags and unshallow the git repository automatically", - ) - parser.add_argument( - "-v", - "--verbose", - action="count", - default=0, - help="set the script verbosity, could be used multiple", - ) - parser.add_argument( - "--debug-helpers", - action="store_true", - help="add debug logging for git_helper and github_helper", - ) - parser.add_argument( - "--output", - type=argparse.FileType("w"), - default="-", - help="output file for changelog", - ) - parser.add_argument( - "--repo", - default="ClickHouse/ClickHouse", - help="a repository to query for pull-requests from GitHub", - ) - parser.add_argument( - "--jobs", - type=int, - default=10, - help="number of jobs to get pull-requests info from GitHub API", - ) - parser.add_argument( - "--gh-user-or-token", - help="user name or GH token to authenticate", - ) - parser.add_argument( - "--gh-password", - help="a password that should be used when user is given", - ) - parser.add_argument( - "--with-testing-tags", - action="store_true", - help="by default '*-testing' tags are ignored, this argument enables them too", - ) - parser.add_argument( - "--from", - dest="from_ref", - help="git ref for a starting point of changelog, by default is calculated " - "automatically to match a previous tag in history", - ) - parser.add_argument( - "to_ref", - metavar="TO_REF", - help="git ref for the changelog end", - ) - args = parser.parse_args() - return args - - -# This function mirrors the PR description checks in ClickhousePullRequestTrigger. -# Returns None if the PR should not be mentioned in changelog. -def generate_description(item: PullRequest, repo: Repository) -> Optional[Description]: - backport_number = item.number - if item.head.ref.startswith("backport/"): - branch_parts = item.head.ref.split("/") - if len(branch_parts) == 3: - try: - item = gh.get_pull_cached(repo, int(branch_parts[-1])) - except Exception as e: - logging.warning("unable to get backpoted PR, exception: %s", e) - else: - logging.warning( - "The branch %s doesn't match backport template, using PR %s as is", - item.head.ref, - item.number, - ) - description = item.body - # Don't skip empty lines because they delimit parts of description - lines = [x.strip() for x in (description.split("\n") if description else [])] - lines = [re.sub(r"\s+", " ", ln) for ln in lines] - - category = "" - entry = "" - - if lines: - i = 0 - while i < len(lines): - if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]): - i += 1 - if i >= len(lines): - break - # Can have one empty line between header and the category itself. - # Filter it out. - if not lines[i]: - i += 1 - if i >= len(lines): - break - category = re.sub(r"^[-*\s]*", "", lines[i]) - i += 1 - elif re.match( - r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] - ): - i += 1 - # Can have one empty line between header and the entry itself. - # Filter it out. - if i < len(lines) and not lines[i]: - i += 1 - # All following lines until empty one are the changelog entry. - entry_lines = [] - while i < len(lines) and lines[i]: - entry_lines.append(lines[i]) - i += 1 - entry = " ".join(entry_lines) - else: - i += 1 - - # Remove excessive bullets from the entry. - if re.match(r"^[\-\*] ", entry): - entry = entry[2:] - - # Better style. - if re.match(r"^[a-z]", entry): - entry = entry.capitalize() - - if not category: - # Shouldn't happen, because description check in CI should catch such PRs. - # Fall through, so that it shows up in output and the user can fix it. - category = "NO CL CATEGORY" - - # Filter out the PR categories that are not for changelog. - if re.match( - r"(?i)((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", - category, - ): - category = "NOT FOR CHANGELOG / INSIGNIFICANT" - return Description(item.number, item.user, item.html_url, item.title, category) - - # Normalize bug fixes - if re.match( - r"(?i)bug\Wfix", - category, - ): - category = "Bug Fix (user-visible misbehavior in an official stable release)" - - # Filter out documentations changelog - if re.match( - r"(?i)doc", - category, - ): - return None - - if backport_number != item.number: - entry = f"Backported in #{backport_number}: {entry}" - - if not entry: - # Shouldn't happen, because description check in CI should catch such PRs. - category = "NO CL ENTRY" - entry = "NO CL ENTRY: '" + item.title + "'" - - entry = entry.strip() - if entry[-1] != ".": - entry += "." - - for c in categories_preferred_order: - if ratio(category.lower(), c.lower()) >= 90: - category = c - break - - return Description(item.number, item.user, item.html_url, entry, category) - - -def write_changelog( - fd: TextIO, descriptions: Dict[str, List[Description]], year: int -) -> None: - to_commit = runner(f"git rev-parse {TO_REF}^{{}}")[:11] - from_commit = runner(f"git rev-parse {FROM_REF}^{{}}")[:11] - fd.write( - f"---\nsidebar_position: 1\nsidebar_label: {year}\n---\n\n" - f"# {year} Changelog\n\n" - f"### ClickHouse release {TO_REF} ({to_commit}) FIXME " - f"as compared to {FROM_REF} ({from_commit})\n\n" - ) - - seen_categories = [] # type: List[str] - for category in categories_preferred_order: - if category in descriptions: - seen_categories.append(category) - fd.write(f"#### {category}\n") - for desc in descriptions[category]: - fd.write(f"{desc.formatted_entry}\n") - - fd.write("\n") - - for category in sorted(descriptions): - if category not in seen_categories: - fd.write(f"#### {category}\n\n") - for desc in descriptions[category]: - fd.write(f"{desc.formatted_entry}\n") - - fd.write("\n") - - -def check_refs(from_ref: Optional[str], to_ref: str, with_testing_tags: bool): - global FROM_REF, TO_REF - TO_REF = to_ref - - # Check TO_REF - runner.run(f"git rev-parse {TO_REF}") - - # Check from_ref - if from_ref is None: - # Get all tags pointing to TO_REF - tags = runner.run(f"git tag --points-at '{TO_REF}^{{}}'").split("\n") - logging.info("All tags pointing to %s:\n%s", TO_REF, tags) - if not with_testing_tags: - tags.append("*-testing") - exclude = " ".join([f"--exclude='{tag}'" for tag in tags]) - cmd = f"git describe --abbrev=0 --tags {exclude} '{TO_REF}'" - FROM_REF = runner.run(cmd) - else: - runner.run(f"git rev-parse {FROM_REF}") - FROM_REF = from_ref - - -def set_sha_in_changelog(): - global SHA_IN_CHANGELOG - SHA_IN_CHANGELOG = runner.run( - f"git log --format=format:%H {FROM_REF}..{TO_REF}" - ).split("\n") - - -def get_year(prs: PullRequests) -> int: - if not prs: - return date.today().year - return max(pr.created_at.year for pr in prs) - - -def main(): - log_levels = [logging.WARN, logging.INFO, logging.DEBUG] - args = parse_args() - logging.basicConfig( - format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s", - level=log_levels[min(args.verbose, 2)], - ) - if args.debug_helpers: - logging.getLogger("github_helper").setLevel(logging.DEBUG) - logging.getLogger("git_helper").setLevel(logging.DEBUG) - # Create a cache directory - if not p.isdir(CACHE_PATH): - os.mkdir(CACHE_PATH, 0o700) - - # Get the full repo - if is_shallow(): - logging.info("Unshallow repository") - runner.run("git fetch --unshallow", stderr=DEVNULL) - logging.info("Fetching all tags") - runner.run("git fetch --tags", stderr=DEVNULL) - - check_refs(args.from_ref, args.to_ref, args.with_testing_tags) - set_sha_in_changelog() - - logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF) - - # use merge-base commit as a starting point, if used ref in another branch - base_commit = runner.run(f"git merge-base '{FROM_REF}^{{}}' '{TO_REF}^{{}}'") - # Get starting and ending dates for gathering PRs - # Add one day after and before to mitigate TZ possible issues - # `tag^{}` format gives commit ref when we have annotated tags - # format %cs gives a committer date, works better for cherry-picked commits - from_date = runner.run(f"git log -1 --format=format:%cs '{base_commit}'") - to_date = runner.run(f"git log -1 --format=format:%cs '{TO_REF}^{{}}'") - merged = ( - date.fromisoformat(from_date) - timedelta(1), - date.fromisoformat(to_date) + timedelta(1), - ) - - # Get all PRs for the given time frame - global gh - gh = GitHub( - args.gh_user_or_token, - args.gh_password, - create_cache_dir=False, - per_page=100, - pool_size=args.jobs, - ) - gh.cache_path = CACHE_PATH - query = f"type:pr repo:{args.repo} is:merged" - prs = gh.get_pulls_from_search(query=query, merged=merged, sort="created") - - descriptions = get_descriptions(prs) - changelog_year = get_year(prs) - - write_changelog(args.output, descriptions, changelog_year) +import subprocess +import sys +from pathlib import Path +SCRIPT_PATH = (Path(__file__).parents[2] / "tests/ci/changelog.py").absolute() if __name__ == "__main__": - main() + subprocess.check_call(["python3", SCRIPT_PATH, *sys.argv[1:]]) diff --git a/utils/changelog/git_helper.py b/utils/changelog/git_helper.py deleted file mode 120000 index 03b05a7eddd..00000000000 --- a/utils/changelog/git_helper.py +++ /dev/null @@ -1 +0,0 @@ -../../tests/ci/git_helper.py \ No newline at end of file diff --git a/utils/changelog/github_helper.py b/utils/changelog/github_helper.py deleted file mode 120000 index 2d44dfe8000..00000000000 --- a/utils/changelog/github_helper.py +++ /dev/null @@ -1 +0,0 @@ -../../tests/ci/github_helper.py \ No newline at end of file diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index c35e860a5d7..68734ef5ec8 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -473,6 +473,7 @@ Liao LibFuzzer LightHouse LineAsString +LineString Linf LinfDistance LinfNorm @@ -571,6 +572,7 @@ MySQLDump MySQLThreads NATS NCHAR +NDJSON NEKUDOTAYIM NEWDATE NEWDECIMAL @@ -713,6 +715,8 @@ PlantUML PointDistKm PointDistM PointDistRads +PostHistory +PostLink PostgreSQLConnection PostgreSQLThreads Postgres @@ -960,6 +964,7 @@ ToGeoBoundary ToIPv ToParent ToSnowflake +ToSnowflakeID ToString ToUnicode Toolset @@ -987,6 +992,8 @@ UPDATEs URIs URL URL's +URLDecode +URLEncode URLHash URLHierarchy URLPathHierarchy @@ -1213,6 +1220,7 @@ basename bcrypt benchmarking bfloat +bigrams binlog bitAnd bitCount @@ -1453,6 +1461,7 @@ datatypes dateName dateTime dateTimeToSnowflake +dateTimeToSnowflakeID datetime datetimes dayofyear @@ -1461,6 +1470,7 @@ dbeaver dbgen dbms ddl +deallocated deallocation deallocations debian @@ -1503,6 +1513,8 @@ detectCharset detectLanguage detectLanguageMixed detectLanguageUnknown +detectProgrammingLanguage +detectTonality determinator deterministically dictGet @@ -1518,6 +1530,7 @@ dictIsIn disableProtocols disjunction disjunctions +displayName displaySecretsInShowAndSelect distro divideDecimal @@ -1536,6 +1549,7 @@ dumpColumnStructure durations ecto editDistance +editDistanceUTF embeddings emptyArray emptyArrayDate @@ -1567,6 +1581,10 @@ evalMLMethod exFAT expiryMsec exponentialMovingAverage +exponentialTimeDecayedAvg +exponentialTimeDecayedCount +exponentialTimeDecayedMax +exponentialTimeDecayedSum exponentialmovingaverage expr exprN @@ -1604,6 +1622,7 @@ firstSignificantSubdomainCustom firstSignificantSubdomainCustomRFC firstSignificantSubdomainRFC fixedstring +flameGraph flamegraph flatbuffers flattenTuple @@ -1787,6 +1806,7 @@ indexHint indexOf infi inflight +infty initcap initcapUTF initialQueryID @@ -1893,6 +1913,7 @@ lessOrEquals lessorequals levenshtein levenshteinDistance +levenshteinDistanceUTF lexicographically lgamma libFuzzer @@ -1933,6 +1954,8 @@ loghouse london lookups loongarch +lowCardinalityIndices +lowCardinalityKeys lowcardinality lowerUTF lowercased @@ -2000,6 +2023,8 @@ minmax mins misconfiguration mispredictions +mlock +mlockall mmap mmapped modularization @@ -2241,6 +2266,7 @@ proportionsZTest proto protobuf protobufsingle +protocol proxied pseudorandom pseudorandomize @@ -2308,6 +2334,7 @@ randomStringUTF rankCorr rapidjson rawblob +readWKTLineString readWKTMultiPolygon readWKTPoint readWKTPolygon @@ -2466,6 +2493,7 @@ skewpop skewsamp skippingerrors sleepEachRow +snowflakeIDToDateTime snowflakeToDateTime socketcache soundex @@ -2490,6 +2518,7 @@ sqlite sqrt src srcReplicas +stackoverflow stacktrace stacktraces startsWith @@ -2729,6 +2758,9 @@ topLevelDomain topLevelDomainRFC topk topkweighted +transactionID +transactionLatestSnapshot +transactionOldestSnapshot transactional transactionally translateUTF @@ -2782,6 +2814,7 @@ unescaping unhex unicode unidimensional +unigrams unintuitive uniq uniqCombined @@ -2828,6 +2861,7 @@ userver utils uuid uuidv +vCPU varPop varPopStable varSamp @@ -2837,7 +2871,9 @@ variantElement variantType varint varpop +varpopstable varsamp +varsampstable vectorized vectorscan vendoring diff --git a/utils/check-style/check-flake8 b/utils/check-style/check-flake8 new file mode 100755 index 00000000000..58dd8a99d40 --- /dev/null +++ b/utils/check-style/check-flake8 @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +function join_by() { local IFS="$1"; shift; echo "$*"; } + +set -e + +# We check only our code, that's why we skip contrib +GIT_ROOT=$(git rev-parse --show-cdup) +GIT_ROOT=${GIT_ROOT:-./} + +# Find all *.py, *.python files and executable files without extension +# that are determined as python scripts by 'file' util +# in the repo except the contrib directory. +find_cmd=( + find "$GIT_ROOT" -type f -not -path "${GIT_ROOT}contrib/*" + \( + \( + -name '*.py' -or -name "*.python" -or + \( + -executable -not -name "*.*" -exec sh -c 'file {} | grep -q "Python script"' \; + \) + \) + # We skip modules generated by the protocol buffer compiler from *.proto files. + -and -not -name '*_pb2.py' -and -not -name '*_pb2_grpc.py' + \) -print0 +) + +ignores=( + E101 # Indentation contains mixed spaces and tabs + E203 # Whitespace before ':' + E226 # missing whitespace around arithmetic operator + E266 # Too many leading '#' for block comment + E401 # Multiple imports on one line + E402 # Module level import not at top of file + E501 # line too long + E711 # Comparison to None should be 'cond is None:' + E712 # Comparison to true should be 'if cond is true:' or 'if cond:' + E713 # Test for membership should be 'not in' + E714 # Test for object identity should be 'is not' + E722 # Do not use bare except, specify exception instead + E731 # Do not assign a lambda expression, use a def + E741 # Do not use variables named 'I', 'O', or 'l' + F401 # Module imported but unused + F403 # 'from module import *' used; unable to detect undefined names + F405 # Name may be undefined, or defined from star imports: module + F522 # .format(...) unused named arguments + F541 # f-string without any placeholders + F811 # redefinition of unused name from line N + F841 # local variable name is assigned to but never used + W191 # Indentation contains tabs + W291 # Trailing whitespace + W293 # Blank line contains whitespace + W503 # Line break occurred before a binary operator +) +"${find_cmd[@]}" | xargs -0 flake8 --ignore "$(join_by , "${ignores[@]}")" diff --git a/utils/check-style/check-mypy b/utils/check-style/check-mypy index 42cb7fbbd15..4434377e627 100755 --- a/utils/check-style/check-mypy +++ b/utils/check-style/check-mypy @@ -11,13 +11,15 @@ GIT_ROOT=${GIT_ROOT:-.} CONFIG="$GIT_ROOT/tests/ci/.mypy.ini" DIRS=("$GIT_ROOT/tests/ci/" "$GIT_ROOT/tests/ci/"*/) tmp=$(mktemp) + for dir in "${DIRS[@]}"; do if ! compgen -G "$dir"/*.py > /dev/null; then continue fi - if ! mypy --config-file="$CONFIG" --sqlite-cache "$dir"/*.py > "$tmp" 2>&1; then + if ! mypy --config-file="$CONFIG" --sqlite-cache $(find "$dir" -maxdepth 1 -name "*.py" | grep -v "test_") > "$tmp" 2>&1; then echo "Errors while processing $dir": cat "$tmp" fi done + rm -rf "$tmp" diff --git a/utils/check-style/check-pylint b/utils/check-style/check-pylint index 7959a414023..8cfbc68ac96 100755 --- a/utils/check-style/check-pylint +++ b/utils/check-style/check-pylint @@ -10,6 +10,7 @@ function xargs-pylint { xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n } -find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50 +# exclude ci unittest scripts from check: test_* +find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | grep -v "/test_" | xargs-pylint 50 # Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1 diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 5c05907e9dd..380656cd1ca 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -65,6 +65,7 @@ EXTERN_TYPES_EXCLUDES=( ProfileEvents::increment ProfileEvents::incrementForLogMessage ProfileEvents::getName + ProfileEvents::Timer ProfileEvents::Type ProfileEvents::TypeEnum ProfileEvents::dumpToMapColumn @@ -92,6 +93,7 @@ EXTERN_TYPES_EXCLUDES=( ErrorCodes::values ErrorCodes::values[i] ErrorCodes::getErrorCodeByName + ErrorCodes::Value ) for extern_type in ${!EXTERN_TYPES[@]}; do type_of_extern=${EXTERN_TYPES[$extern_type]} @@ -240,8 +242,22 @@ for test_case in "${tests_with_replicated_merge_tree[@]}"; do esac done -# All the submodules should be from https://github.com/ -find $ROOT_PATH -name '.gitmodules' | while read i; do grep -F 'url = ' $i | grep -v -F 'https://github.com/' && echo 'All the submodules should be from https://github.com/'; done +# All submodules should be from https://github.com/ +git config --file "$ROOT_PATH/.gitmodules" --get-regexp 'submodule\..+\.url' | \ +while read -r line; do + name=${line#submodule.}; name=${name%.url*} + url=${line#* } + [[ "$url" != 'https://github.com/'* ]] && echo "All submodules should be from https://github.com/, submodule '$name' has '$url'" +done + +# All submodules should be of this form: [submodule "contrib/libxyz"] (for consistency, the submodule name does matter too much) +# - restrict the check to top-level .gitmodules file +git config --file "$ROOT_PATH/.gitmodules" --get-regexp 'submodule\..+\.path' | \ +while read -r line; do + name=${line#submodule.}; name=${name%.path*} + path=${line#* } + [ "$name" != "$path" ] && echo "Submodule name '$name' is not equal to it's path '$path'" +done # There shouldn't be any code snippets under GPL or LGPL find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL" @@ -306,9 +322,14 @@ std_cerr_cout_excludes=( src/Client/LineReader.cpp src/Client/QueryFuzzer.cpp src/Client/Suggest.cpp + src/Client/ClientBase.h + src/Client/LineReader.h + src/Client/ReplxxLineReader.h src/Bridge/IBridge.cpp src/Daemon/BaseDaemon.cpp src/Loggers/Loggers.cpp + src/Common/GWPAsan.cpp + src/Common/ProgressIndication.h ) sources_with_std_cerr_cout=( $( find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \ diff --git a/utils/check-style/process_style_check_result.py b/utils/check-style/process_style_check_result.py index e603084732d..2c349114a59 100755 --- a/utils/check-style/process_style_check_result.py +++ b/utils/check-style/process_style_check_result.py @@ -18,6 +18,7 @@ def process_result(result_folder): "style", "pylint", "black", + "flake8", "mypy", "typos", "whitespaces", diff --git a/utils/grpc-client/pb2/clickhouse_grpc_pb2.py b/utils/grpc-client/pb2/clickhouse_grpc_pb2.py index 6218047af3c..1e2c63012f3 100644 --- a/utils/grpc-client/pb2/clickhouse_grpc_pb2.py +++ b/utils/grpc-client/pb2/clickhouse_grpc_pb2.py @@ -1,13 +1,12 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: clickhouse_grpc.proto +# Protobuf Python Version: 4.25.3 """Generated protocol buffer code.""" -from google.protobuf.internal import enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -15,149 +14,45 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x63lickhouse_grpc.proto\x12\x0f\x63lickhouse.grpc\")\n\x0bNameAndType\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\"\xf5\x01\n\rExternalTable\x12\x0c\n\x04name\x18\x01 \x01(\t\x12-\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\x0e\n\x06\x66ormat\x18\x04 \x01(\t\x12\x18\n\x10\x63ompression_type\x18\x06 \x01(\t\x12>\n\x08settings\x18\x05 \x03(\x0b\x32,.clickhouse.grpc.ExternalTable.SettingsEntry\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x85\x03\n\x1cObsoleteTransportCompression\x12U\n\talgorithm\x18\x01 \x01(\x0e\x32\x42.clickhouse.grpc.ObsoleteTransportCompression.CompressionAlgorithm\x12M\n\x05level\x18\x02 \x01(\x0e\x32>.clickhouse.grpc.ObsoleteTransportCompression.CompressionLevel\"R\n\x14\x43ompressionAlgorithm\x12\x12\n\x0eNO_COMPRESSION\x10\x00\x12\x0b\n\x07\x44\x45\x46LATE\x10\x01\x12\x08\n\x04GZIP\x10\x02\x12\x0f\n\x0bSTREAM_GZIP\x10\x03\"k\n\x10\x43ompressionLevel\x12\x14\n\x10\x43OMPRESSION_NONE\x10\x00\x12\x13\n\x0f\x43OMPRESSION_LOW\x10\x01\x12\x16\n\x12\x43OMPRESSION_MEDIUM\x10\x02\x12\x14\n\x10\x43OMPRESSION_HIGH\x10\x03\"\x8e\x06\n\tQueryInfo\x12\r\n\x05query\x18\x01 \x01(\t\x12\x10\n\x08query_id\x18\x02 \x01(\t\x12:\n\x08settings\x18\x03 \x03(\x0b\x32(.clickhouse.grpc.QueryInfo.SettingsEntry\x12\x10\n\x08\x64\x61tabase\x18\x04 \x01(\t\x12\x12\n\ninput_data\x18\x05 \x01(\x0c\x12\x1c\n\x14input_data_delimiter\x18\x06 \x01(\x0c\x12\x15\n\routput_format\x18\x07 \x01(\t\x12\x1b\n\x13send_output_columns\x18\x18 \x01(\x08\x12\x37\n\x0f\x65xternal_tables\x18\x08 \x03(\x0b\x32\x1e.clickhouse.grpc.ExternalTable\x12\x11\n\tuser_name\x18\t \x01(\t\x12\x10\n\x08password\x18\n \x01(\t\x12\r\n\x05quota\x18\x0b \x01(\t\x12\x12\n\nsession_id\x18\x0c \x01(\t\x12\x15\n\rsession_check\x18\r \x01(\x08\x12\x17\n\x0fsession_timeout\x18\x0e \x01(\r\x12\x0e\n\x06\x63\x61ncel\x18\x0f \x01(\x08\x12\x17\n\x0fnext_query_info\x18\x10 \x01(\x08\x12\x1e\n\x16input_compression_type\x18\x14 \x01(\t\x12\x1f\n\x17output_compression_type\x18\x15 \x01(\t\x12 \n\x18output_compression_level\x18\x13 \x01(\x05\x12\"\n\x1atransport_compression_type\x18\x16 \x01(\t\x12#\n\x1btransport_compression_level\x18\x17 \x01(\x05\x12R\n\x1bobsolete_result_compression\x18\x11 \x01(\x0b\x32-.clickhouse.grpc.ObsoleteTransportCompression\x12!\n\x19obsolete_compression_type\x18\x12 \x01(\t\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xa1\x01\n\x08LogEntry\x12\x0c\n\x04time\x18\x01 \x01(\r\x12\x19\n\x11time_microseconds\x18\x02 \x01(\r\x12\x11\n\tthread_id\x18\x03 \x01(\x04\x12\x10\n\x08query_id\x18\x04 \x01(\t\x12)\n\x05level\x18\x05 \x01(\x0e\x32\x1a.clickhouse.grpc.LogsLevel\x12\x0e\n\x06source\x18\x06 \x01(\t\x12\x0c\n\x04text\x18\x07 \x01(\t\"z\n\x08Progress\x12\x11\n\tread_rows\x18\x01 \x01(\x04\x12\x12\n\nread_bytes\x18\x02 \x01(\x04\x12\x1a\n\x12total_rows_to_read\x18\x03 \x01(\x04\x12\x14\n\x0cwritten_rows\x18\x04 \x01(\x04\x12\x15\n\rwritten_bytes\x18\x05 \x01(\x04\"p\n\x05Stats\x12\x0c\n\x04rows\x18\x01 \x01(\x04\x12\x0e\n\x06\x62locks\x18\x02 \x01(\x04\x12\x17\n\x0f\x61llocated_bytes\x18\x03 \x01(\x04\x12\x15\n\rapplied_limit\x18\x04 \x01(\x08\x12\x19\n\x11rows_before_limit\x18\x05 \x01(\x04\"R\n\tException\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x05\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x14\n\x0c\x64isplay_text\x18\x03 \x01(\t\x12\x13\n\x0bstack_trace\x18\x04 \x01(\t\"\xeb\x02\n\x06Result\x12\x10\n\x08query_id\x18\t \x01(\t\x12\x11\n\ttime_zone\x18\n \x01(\t\x12\x15\n\routput_format\x18\x0b \x01(\t\x12\x34\n\x0eoutput_columns\x18\x0c \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0e\n\x06output\x18\x01 \x01(\x0c\x12\x0e\n\x06totals\x18\x02 \x01(\x0c\x12\x10\n\x08\x65xtremes\x18\x03 \x01(\x0c\x12\'\n\x04logs\x18\x04 \x03(\x0b\x32\x19.clickhouse.grpc.LogEntry\x12+\n\x08progress\x18\x05 \x01(\x0b\x32\x19.clickhouse.grpc.Progress\x12%\n\x05stats\x18\x06 \x01(\x0b\x32\x16.clickhouse.grpc.Stats\x12-\n\texception\x18\x07 \x01(\x0b\x32\x1a.clickhouse.grpc.Exception\x12\x11\n\tcancelled\x18\x08 \x01(\x08*\x9d\x01\n\tLogsLevel\x12\x0c\n\x08LOG_NONE\x10\x00\x12\r\n\tLOG_FATAL\x10\x01\x12\x10\n\x0cLOG_CRITICAL\x10\x02\x12\r\n\tLOG_ERROR\x10\x03\x12\x0f\n\x0bLOG_WARNING\x10\x04\x12\x0e\n\nLOG_NOTICE\x10\x05\x12\x13\n\x0fLOG_INFORMATION\x10\x06\x12\r\n\tLOG_DEBUG\x10\x07\x12\r\n\tLOG_TRACE\x10\x08\x32\xdb\x02\n\nClickHouse\x12\x45\n\x0c\x45xecuteQuery\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x12V\n\x1b\x45xecuteQueryWithStreamInput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x12W\n\x1c\x45xecuteQueryWithStreamOutput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x30\x01\x12U\n\x18\x45xecuteQueryWithStreamIO\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x30\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x63lickhouse_grpc.proto\x12\x0f\x63lickhouse.grpc\")\n\x0bNameAndType\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\"\xf5\x01\n\rExternalTable\x12\x0c\n\x04name\x18\x01 \x01(\t\x12-\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12\x0e\n\x06\x66ormat\x18\x04 \x01(\t\x12\x18\n\x10\x63ompression_type\x18\x06 \x01(\t\x12>\n\x08settings\x18\x05 \x03(\x0b\x32,.clickhouse.grpc.ExternalTable.SettingsEntry\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x85\x03\n\x1cObsoleteTransportCompression\x12U\n\talgorithm\x18\x01 \x01(\x0e\x32\x42.clickhouse.grpc.ObsoleteTransportCompression.CompressionAlgorithm\x12M\n\x05level\x18\x02 \x01(\x0e\x32>.clickhouse.grpc.ObsoleteTransportCompression.CompressionLevel\"R\n\x14\x43ompressionAlgorithm\x12\x12\n\x0eNO_COMPRESSION\x10\x00\x12\x0b\n\x07\x44\x45\x46LATE\x10\x01\x12\x08\n\x04GZIP\x10\x02\x12\x0f\n\x0bSTREAM_GZIP\x10\x03\"k\n\x10\x43ompressionLevel\x12\x14\n\x10\x43OMPRESSION_NONE\x10\x00\x12\x13\n\x0f\x43OMPRESSION_LOW\x10\x01\x12\x16\n\x12\x43OMPRESSION_MEDIUM\x10\x02\x12\x14\n\x10\x43OMPRESSION_HIGH\x10\x03\"\x9b\x06\n\tQueryInfo\x12\r\n\x05query\x18\x01 \x01(\t\x12\x10\n\x08query_id\x18\x02 \x01(\t\x12:\n\x08settings\x18\x03 \x03(\x0b\x32(.clickhouse.grpc.QueryInfo.SettingsEntry\x12\x10\n\x08\x64\x61tabase\x18\x04 \x01(\t\x12\x12\n\ninput_data\x18\x05 \x01(\x0c\x12\x1c\n\x14input_data_delimiter\x18\x06 \x01(\x0c\x12\x15\n\routput_format\x18\x07 \x01(\t\x12\x1b\n\x13send_output_columns\x18\x18 \x01(\x08\x12\x37\n\x0f\x65xternal_tables\x18\x08 \x03(\x0b\x32\x1e.clickhouse.grpc.ExternalTable\x12\x11\n\tuser_name\x18\t \x01(\t\x12\x10\n\x08password\x18\n \x01(\t\x12\r\n\x05quota\x18\x0b \x01(\t\x12\x0b\n\x03jwt\x18\x19 \x01(\t\x12\x12\n\nsession_id\x18\x0c \x01(\t\x12\x15\n\rsession_check\x18\r \x01(\x08\x12\x17\n\x0fsession_timeout\x18\x0e \x01(\r\x12\x0e\n\x06\x63\x61ncel\x18\x0f \x01(\x08\x12\x17\n\x0fnext_query_info\x18\x10 \x01(\x08\x12\x1e\n\x16input_compression_type\x18\x14 \x01(\t\x12\x1f\n\x17output_compression_type\x18\x15 \x01(\t\x12 \n\x18output_compression_level\x18\x13 \x01(\x05\x12\"\n\x1atransport_compression_type\x18\x16 \x01(\t\x12#\n\x1btransport_compression_level\x18\x17 \x01(\x05\x12R\n\x1bobsolete_result_compression\x18\x11 \x01(\x0b\x32-.clickhouse.grpc.ObsoleteTransportCompression\x12!\n\x19obsolete_compression_type\x18\x12 \x01(\t\x1a/\n\rSettingsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xa1\x01\n\x08LogEntry\x12\x0c\n\x04time\x18\x01 \x01(\r\x12\x19\n\x11time_microseconds\x18\x02 \x01(\r\x12\x11\n\tthread_id\x18\x03 \x01(\x04\x12\x10\n\x08query_id\x18\x04 \x01(\t\x12)\n\x05level\x18\x05 \x01(\x0e\x32\x1a.clickhouse.grpc.LogsLevel\x12\x0e\n\x06source\x18\x06 \x01(\t\x12\x0c\n\x04text\x18\x07 \x01(\t\"z\n\x08Progress\x12\x11\n\tread_rows\x18\x01 \x01(\x04\x12\x12\n\nread_bytes\x18\x02 \x01(\x04\x12\x1a\n\x12total_rows_to_read\x18\x03 \x01(\x04\x12\x14\n\x0cwritten_rows\x18\x04 \x01(\x04\x12\x15\n\rwritten_bytes\x18\x05 \x01(\x04\"p\n\x05Stats\x12\x0c\n\x04rows\x18\x01 \x01(\x04\x12\x0e\n\x06\x62locks\x18\x02 \x01(\x04\x12\x17\n\x0f\x61llocated_bytes\x18\x03 \x01(\x04\x12\x15\n\rapplied_limit\x18\x04 \x01(\x08\x12\x19\n\x11rows_before_limit\x18\x05 \x01(\x04\"R\n\tException\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x05\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x14\n\x0c\x64isplay_text\x18\x03 \x01(\t\x12\x13\n\x0bstack_trace\x18\x04 \x01(\t\"\xeb\x02\n\x06Result\x12\x10\n\x08query_id\x18\t \x01(\t\x12\x11\n\ttime_zone\x18\n \x01(\t\x12\x15\n\routput_format\x18\x0b \x01(\t\x12\x34\n\x0eoutput_columns\x18\x0c \x03(\x0b\x32\x1c.clickhouse.grpc.NameAndType\x12\x0e\n\x06output\x18\x01 \x01(\x0c\x12\x0e\n\x06totals\x18\x02 \x01(\x0c\x12\x10\n\x08\x65xtremes\x18\x03 \x01(\x0c\x12\'\n\x04logs\x18\x04 \x03(\x0b\x32\x19.clickhouse.grpc.LogEntry\x12+\n\x08progress\x18\x05 \x01(\x0b\x32\x19.clickhouse.grpc.Progress\x12%\n\x05stats\x18\x06 \x01(\x0b\x32\x16.clickhouse.grpc.Stats\x12-\n\texception\x18\x07 \x01(\x0b\x32\x1a.clickhouse.grpc.Exception\x12\x11\n\tcancelled\x18\x08 \x01(\x08*\x9d\x01\n\tLogsLevel\x12\x0c\n\x08LOG_NONE\x10\x00\x12\r\n\tLOG_FATAL\x10\x01\x12\x10\n\x0cLOG_CRITICAL\x10\x02\x12\r\n\tLOG_ERROR\x10\x03\x12\x0f\n\x0bLOG_WARNING\x10\x04\x12\x0e\n\nLOG_NOTICE\x10\x05\x12\x13\n\x0fLOG_INFORMATION\x10\x06\x12\r\n\tLOG_DEBUG\x10\x07\x12\r\n\tLOG_TRACE\x10\x08\x32\xdb\x02\n\nClickHouse\x12\x45\n\x0c\x45xecuteQuery\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x12V\n\x1b\x45xecuteQueryWithStreamInput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x12W\n\x1c\x45xecuteQueryWithStreamOutput\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00\x30\x01\x12U\n\x18\x45xecuteQueryWithStreamIO\x12\x1a.clickhouse.grpc.QueryInfo\x1a\x17.clickhouse.grpc.Result\"\x00(\x01\x30\x01\x62\x06proto3') -_LOGSLEVEL = DESCRIPTOR.enum_types_by_name['LogsLevel'] -LogsLevel = enum_type_wrapper.EnumTypeWrapper(_LOGSLEVEL) -LOG_NONE = 0 -LOG_FATAL = 1 -LOG_CRITICAL = 2 -LOG_ERROR = 3 -LOG_WARNING = 4 -LOG_NOTICE = 5 -LOG_INFORMATION = 6 -LOG_DEBUG = 7 -LOG_TRACE = 8 - - -_NAMEANDTYPE = DESCRIPTOR.message_types_by_name['NameAndType'] -_EXTERNALTABLE = DESCRIPTOR.message_types_by_name['ExternalTable'] -_EXTERNALTABLE_SETTINGSENTRY = _EXTERNALTABLE.nested_types_by_name['SettingsEntry'] -_OBSOLETETRANSPORTCOMPRESSION = DESCRIPTOR.message_types_by_name['ObsoleteTransportCompression'] -_QUERYINFO = DESCRIPTOR.message_types_by_name['QueryInfo'] -_QUERYINFO_SETTINGSENTRY = _QUERYINFO.nested_types_by_name['SettingsEntry'] -_LOGENTRY = DESCRIPTOR.message_types_by_name['LogEntry'] -_PROGRESS = DESCRIPTOR.message_types_by_name['Progress'] -_STATS = DESCRIPTOR.message_types_by_name['Stats'] -_EXCEPTION = DESCRIPTOR.message_types_by_name['Exception'] -_RESULT = DESCRIPTOR.message_types_by_name['Result'] -_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM = _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name['CompressionAlgorithm'] -_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL = _OBSOLETETRANSPORTCOMPRESSION.enum_types_by_name['CompressionLevel'] -NameAndType = _reflection.GeneratedProtocolMessageType('NameAndType', (_message.Message,), { - 'DESCRIPTOR' : _NAMEANDTYPE, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.NameAndType) - }) -_sym_db.RegisterMessage(NameAndType) - -ExternalTable = _reflection.GeneratedProtocolMessageType('ExternalTable', (_message.Message,), { - - 'SettingsEntry' : _reflection.GeneratedProtocolMessageType('SettingsEntry', (_message.Message,), { - 'DESCRIPTOR' : _EXTERNALTABLE_SETTINGSENTRY, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable.SettingsEntry) - }) - , - 'DESCRIPTOR' : _EXTERNALTABLE, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.ExternalTable) - }) -_sym_db.RegisterMessage(ExternalTable) -_sym_db.RegisterMessage(ExternalTable.SettingsEntry) - -ObsoleteTransportCompression = _reflection.GeneratedProtocolMessageType('ObsoleteTransportCompression', (_message.Message,), { - 'DESCRIPTOR' : _OBSOLETETRANSPORTCOMPRESSION, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.ObsoleteTransportCompression) - }) -_sym_db.RegisterMessage(ObsoleteTransportCompression) - -QueryInfo = _reflection.GeneratedProtocolMessageType('QueryInfo', (_message.Message,), { - - 'SettingsEntry' : _reflection.GeneratedProtocolMessageType('SettingsEntry', (_message.Message,), { - 'DESCRIPTOR' : _QUERYINFO_SETTINGSENTRY, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo.SettingsEntry) - }) - , - 'DESCRIPTOR' : _QUERYINFO, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.QueryInfo) - }) -_sym_db.RegisterMessage(QueryInfo) -_sym_db.RegisterMessage(QueryInfo.SettingsEntry) - -LogEntry = _reflection.GeneratedProtocolMessageType('LogEntry', (_message.Message,), { - 'DESCRIPTOR' : _LOGENTRY, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.LogEntry) - }) -_sym_db.RegisterMessage(LogEntry) - -Progress = _reflection.GeneratedProtocolMessageType('Progress', (_message.Message,), { - 'DESCRIPTOR' : _PROGRESS, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.Progress) - }) -_sym_db.RegisterMessage(Progress) - -Stats = _reflection.GeneratedProtocolMessageType('Stats', (_message.Message,), { - 'DESCRIPTOR' : _STATS, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.Stats) - }) -_sym_db.RegisterMessage(Stats) - -Exception = _reflection.GeneratedProtocolMessageType('Exception', (_message.Message,), { - 'DESCRIPTOR' : _EXCEPTION, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.Exception) - }) -_sym_db.RegisterMessage(Exception) - -Result = _reflection.GeneratedProtocolMessageType('Result', (_message.Message,), { - 'DESCRIPTOR' : _RESULT, - '__module__' : 'clickhouse_grpc_pb2' - # @@protoc_insertion_point(class_scope:clickhouse.grpc.Result) - }) -_sym_db.RegisterMessage(Result) - -_CLICKHOUSE = DESCRIPTOR.services_by_name['ClickHouse'] +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'clickhouse_grpc_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - _EXTERNALTABLE_SETTINGSENTRY._options = None - _EXTERNALTABLE_SETTINGSENTRY._serialized_options = b'8\001' - _QUERYINFO_SETTINGSENTRY._options = None - _QUERYINFO_SETTINGSENTRY._serialized_options = b'8\001' - _LOGSLEVEL._serialized_start=2363 - _LOGSLEVEL._serialized_end=2520 - _NAMEANDTYPE._serialized_start=42 - _NAMEANDTYPE._serialized_end=83 - _EXTERNALTABLE._serialized_start=86 - _EXTERNALTABLE._serialized_end=331 - _EXTERNALTABLE_SETTINGSENTRY._serialized_start=284 - _EXTERNALTABLE_SETTINGSENTRY._serialized_end=331 - _OBSOLETETRANSPORTCOMPRESSION._serialized_start=334 - _OBSOLETETRANSPORTCOMPRESSION._serialized_end=723 - _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_start=532 - _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM._serialized_end=614 - _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_start=616 - _OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL._serialized_end=723 - _QUERYINFO._serialized_start=726 - _QUERYINFO._serialized_end=1508 - _QUERYINFO_SETTINGSENTRY._serialized_start=284 - _QUERYINFO_SETTINGSENTRY._serialized_end=331 - _LOGENTRY._serialized_start=1511 - _LOGENTRY._serialized_end=1672 - _PROGRESS._serialized_start=1674 - _PROGRESS._serialized_end=1796 - _STATS._serialized_start=1798 - _STATS._serialized_end=1910 - _EXCEPTION._serialized_start=1912 - _EXCEPTION._serialized_end=1994 - _RESULT._serialized_start=1997 - _RESULT._serialized_end=2360 - _CLICKHOUSE._serialized_start=2523 - _CLICKHOUSE._serialized_end=2870 + _globals['_EXTERNALTABLE_SETTINGSENTRY']._options = None + _globals['_EXTERNALTABLE_SETTINGSENTRY']._serialized_options = b'8\001' + _globals['_QUERYINFO_SETTINGSENTRY']._options = None + _globals['_QUERYINFO_SETTINGSENTRY']._serialized_options = b'8\001' + _globals['_LOGSLEVEL']._serialized_start=2376 + _globals['_LOGSLEVEL']._serialized_end=2533 + _globals['_NAMEANDTYPE']._serialized_start=42 + _globals['_NAMEANDTYPE']._serialized_end=83 + _globals['_EXTERNALTABLE']._serialized_start=86 + _globals['_EXTERNALTABLE']._serialized_end=331 + _globals['_EXTERNALTABLE_SETTINGSENTRY']._serialized_start=284 + _globals['_EXTERNALTABLE_SETTINGSENTRY']._serialized_end=331 + _globals['_OBSOLETETRANSPORTCOMPRESSION']._serialized_start=334 + _globals['_OBSOLETETRANSPORTCOMPRESSION']._serialized_end=723 + _globals['_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM']._serialized_start=532 + _globals['_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONALGORITHM']._serialized_end=614 + _globals['_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL']._serialized_start=616 + _globals['_OBSOLETETRANSPORTCOMPRESSION_COMPRESSIONLEVEL']._serialized_end=723 + _globals['_QUERYINFO']._serialized_start=726 + _globals['_QUERYINFO']._serialized_end=1521 + _globals['_QUERYINFO_SETTINGSENTRY']._serialized_start=284 + _globals['_QUERYINFO_SETTINGSENTRY']._serialized_end=331 + _globals['_LOGENTRY']._serialized_start=1524 + _globals['_LOGENTRY']._serialized_end=1685 + _globals['_PROGRESS']._serialized_start=1687 + _globals['_PROGRESS']._serialized_end=1809 + _globals['_STATS']._serialized_start=1811 + _globals['_STATS']._serialized_end=1923 + _globals['_EXCEPTION']._serialized_start=1925 + _globals['_EXCEPTION']._serialized_end=2007 + _globals['_RESULT']._serialized_start=2010 + _globals['_RESULT']._serialized_end=2373 + _globals['_CLICKHOUSE']._serialized_start=2536 + _globals['_CLICKHOUSE']._serialized_end=2883 # @@protoc_insertion_point(module_scope) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index ed7e09685f0..5ae4c7a0b1c 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1238,9 +1238,13 @@ void Runner::createConnections() std::shared_ptr Runner::getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx) { - Coordination::ZooKeeper::Node node{Poco::Net::SocketAddress{connection_info.host}, static_cast(connection_info_idx), connection_info.secure}; - std::vector nodes; - nodes.push_back(node); + zkutil::ShuffleHost host; + host.host = connection_info.host; + host.secure = connection_info.secure; + host.original_index = static_cast(connection_info_idx); + host.address = Poco::Net::SocketAddress{connection_info.host}; + + zkutil::ShuffleHosts nodes{host}; zkutil::ZooKeeperArgs args; args.session_timeout_ms = connection_info.session_timeout_ms; args.connection_timeout_ms = connection_info.connection_timeout_ms; diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index f7d84cce4b1..8112ed9083b 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,5 +1,12 @@ +v24.6.1.4423-stable 2024-07-01 +v24.5.4.49-stable 2024-07-01 +v24.5.3.5-stable 2024-06-13 +v24.5.2.34-stable 2024-06-13 v24.5.1.1763-stable 2024-06-01 +v24.4.3.25-stable 2024-06-14 +v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.4.147-lts 2024-06-13 v24.3.3.102-lts 2024-05-01 v24.3.2.23-lts 2024-04-03 v24.3.1.2672-lts 2024-03-27 @@ -37,6 +44,7 @@ v23.9.4.11-stable 2023-11-08 v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 +v23.8.15.35-lts 2024-06-14 v23.8.14.6-lts 2024-05-02 v23.8.13.25-lts 2024-04-26 v23.8.12.13-lts 2024-03-26 diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index cad7164b775..fd2fa669f40 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -3,4 +3,6 @@ clickhouse_add_executable(clickhouse-zookeeper-cli ${ClickHouse_SOURCE_DIR}/src/Client/LineReader.cpp) target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper_no_log - dbms) + dbms + clickhouse_functions +) diff --git a/utils/zookeeper-dump-tree/CMakeLists.txt b/utils/zookeeper-dump-tree/CMakeLists.txt index 85e4d18c19f..3f3df65776a 100644 --- a/utils/zookeeper-dump-tree/CMakeLists.txt +++ b/utils/zookeeper-dump-tree/CMakeLists.txt @@ -3,4 +3,5 @@ target_link_libraries(zookeeper-dump-tree PRIVATE clickhouse_common_zookeeper_no_log clickhouse_common_io dbms + clickhouse_functions boost::program_options) diff --git a/utils/zookeeper-remove-by-list/CMakeLists.txt b/utils/zookeeper-remove-by-list/CMakeLists.txt index 50aaed76110..a4d7dccef65 100644 --- a/utils/zookeeper-remove-by-list/CMakeLists.txt +++ b/utils/zookeeper-remove-by-list/CMakeLists.txt @@ -2,4 +2,5 @@ clickhouse_add_executable (zookeeper-remove-by-list main.cpp ${SRCS}) target_link_libraries(zookeeper-remove-by-list PRIVATE clickhouse_common_zookeeper_no_log dbms + clickhouse_functions boost::program_options)